From 4f3828797f41cdd2e3118b1cd45c654491ff6c2d Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Fri, 19 Feb 2016 15:33:56 +1100 Subject: Merge pull request #2505 from wiredtiger/wt-2411 WT-2411 Drop the checkpoint lock when LSM is draining its queue. (cherry picked from commit 2b78ad8a6bd446d06d1a453198b68befed57fbe5) --- src/include/schema.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/include/schema.h b/src/include/schema.h index 1e4e4fde81f..f93c596e2ca 100644 --- a/src/include/schema.h +++ b/src/include/schema.h @@ -174,6 +174,8 @@ struct __wt_table { */ #define WT_WITHOUT_LOCKS(session, op) do { \ WT_CONNECTION_IMPL *__conn = S2C(session); \ + bool __checkpoint_locked = \ + F_ISSET(session, WT_SESSION_LOCKED_CHECKPOINT); \ bool __handle_locked = \ F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST); \ bool __table_locked = \ @@ -192,7 +194,15 @@ struct __wt_table { F_CLR(session, WT_SESSION_LOCKED_SCHEMA); \ __wt_spin_unlock(session, &__conn->schema_lock); \ } \ + if (__checkpoint_locked) { \ + F_CLR(session, WT_SESSION_LOCKED_CHECKPOINT); \ + __wt_spin_unlock(session, &__conn->checkpoint_lock); \ + } \ op; \ + if (__checkpoint_locked) { \ + __wt_spin_lock(session, &__conn->checkpoint_lock); \ + F_SET(session, WT_SESSION_LOCKED_CHECKPOINT); \ + } \ if (__schema_locked) { \ __wt_spin_lock(session, &__conn->schema_lock); \ F_SET(session, WT_SESSION_LOCKED_SCHEMA); \ -- cgit v1.2.1 From eaa7b5f0fcc62f356c33a2c56f45b609a73ca5dd Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 20 Apr 2016 17:02:50 +1000 Subject: Merge pull request #2670 from wiredtiger/wt-2566 WT-2566 Lock/unlock operations should imply memory barriers. (cherry picked from commit 05cfbc26c2ab2099d7c98080a79ae67ea531c24f) --- src/include/mutex.i | 12 ++++++++++++ src/support/mtx_rw.c | 37 ++++++++++++++++++++++++++----------- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/src/include/mutex.i b/src/include/mutex.i index 52250f84ab3..65956c13c08 100644 --- a/src/include/mutex.i +++ b/src/include/mutex.i @@ -306,6 +306,12 @@ __wt_fair_lock(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock) __wt_sleep(0, 10); } + /* + * Applications depend on a barrier here so that operations holding the + * lock see consistent data. + */ + WT_READ_BARRIER(); + return (0); } @@ -318,6 +324,12 @@ __wt_fair_unlock(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock) { WT_UNUSED(session); + /* + * Ensure that all updates made while the lock was held are visible to + * the next thread to acquire the lock. + */ + WT_WRITE_BARRIER(); + /* * We have exclusive access - the update does not need to be atomic. */ diff --git a/src/support/mtx_rw.c b/src/support/mtx_rw.c index b6876cdfbdc..dbf73bb4f13 100644 --- a/src/support/mtx_rw.c +++ b/src/support/mtx_rw.c @@ -183,6 +183,8 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) session, WT_VERB_MUTEX, "rwlock: readlock %s", rwlock->name)); WT_STAT_FAST_CONN_INCR(session, rwlock_read); + WT_DIAGNOSTIC_YIELD; + l = &rwlock->rwlock; /* @@ -213,6 +215,12 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) */ ++l->s.readers; + /* + * Applications depend on a barrier here so that operations holding the + * lock see consistent data. + */ + WT_READ_BARRIER(); + return (0); } @@ -306,6 +314,12 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) __wt_sleep(0, 10); } + /* + * Applications depend on a barrier here so that operations holding the + * lock see consistent data. + */ + WT_READ_BARRIER(); + return (0); } @@ -316,31 +330,32 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) int __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) { - wt_rwlock_t *l, copy; + wt_rwlock_t *l, new; WT_RET(__wt_verbose( session, WT_VERB_MUTEX, "rwlock: writeunlock %s", rwlock->name)); + /* + * Ensure that all updates made while the lock was held are visible to + * the next thread to acquire the lock. + */ + WT_WRITE_BARRIER(); + l = &rwlock->rwlock; - copy = *l; + new = *l; /* * We're the only writer of the writers/readers fields, so the update * does not need to be atomic; we have to update both values at the * same time though, otherwise we'd potentially race with the thread * next granted the lock. - * - * Use a memory barrier to ensure the compiler doesn't mess with these - * instructions and rework the code in a way that avoids the update as - * a unit. */ - WT_BARRIER(); - - ++copy.s.writers; - ++copy.s.readers; + ++new.s.writers; + ++new.s.readers; + l->i.wr = new.i.wr; - l->i.wr = copy.i.wr; + WT_DIAGNOSTIC_YIELD; return (0); } -- cgit v1.2.1 From 2efe896a63d36c49b18d7fba093b3052d565cb55 Mon Sep 17 00:00:00 2001 From: David Hows Date: Fri, 23 Dec 2016 11:22:20 +1100 Subject: Bump release version on develop to 2.9.2 --- README | 6 +++--- RELEASE_INFO | 2 +- build_posix/aclocal/version-set.m4 | 4 ++-- build_posix/aclocal/version.m4 | 2 +- dist/package/wiredtiger.spec | 2 +- src/docs/upgrading.dox | 4 ++++ 6 files changed, 12 insertions(+), 8 deletions(-) diff --git a/README b/README index 4b25a42f4eb..f7edae2835d 100644 --- a/README +++ b/README @@ -1,6 +1,6 @@ -WiredTiger 2.9.1: (December 23, 2016) +WiredTiger 2.9.2: (December 23, 2016) -This is version 2.9.1 of WiredTiger. +This is version 2.9.2 of WiredTiger. WiredTiger release packages and documentation can be found at: @@ -8,7 +8,7 @@ WiredTiger release packages and documentation can be found at: The documentation for this specific release can be found at: - http://source.wiredtiger.com/2.9.1/index.html + http://source.wiredtiger.com/2.9.2/index.html The WiredTiger source code can be found at: diff --git a/RELEASE_INFO b/RELEASE_INFO index 502b17188ce..b7145aa2cb3 100644 --- a/RELEASE_INFO +++ b/RELEASE_INFO @@ -1,6 +1,6 @@ WIREDTIGER_VERSION_MAJOR=2 WIREDTIGER_VERSION_MINOR=9 -WIREDTIGER_VERSION_PATCH=1 +WIREDTIGER_VERSION_PATCH=2 WIREDTIGER_VERSION="$WIREDTIGER_VERSION_MAJOR.$WIREDTIGER_VERSION_MINOR.$WIREDTIGER_VERSION_PATCH" WIREDTIGER_RELEASE_DATE=`date "+%B %e, %Y"` diff --git a/build_posix/aclocal/version-set.m4 b/build_posix/aclocal/version-set.m4 index b3f2c50fad8..c677ce41192 100644 --- a/build_posix/aclocal/version-set.m4 +++ b/build_posix/aclocal/version-set.m4 @@ -2,8 +2,8 @@ dnl build by dist/s_version VERSION_MAJOR=2 VERSION_MINOR=9 -VERSION_PATCH=1 -VERSION_STRING='"WiredTiger 2.9.1: (December 23, 2016)"' +VERSION_PATCH=2 +VERSION_STRING='"WiredTiger 2.9.2: (December 23, 2016)"' AC_SUBST(VERSION_MAJOR) AC_SUBST(VERSION_MINOR) diff --git a/build_posix/aclocal/version.m4 b/build_posix/aclocal/version.m4 index a75ba93e405..29782a22f82 100644 --- a/build_posix/aclocal/version.m4 +++ b/build_posix/aclocal/version.m4 @@ -1,2 +1,2 @@ dnl WiredTiger product version for AC_INIT. Maintained by dist/s_version -2.9.1 +2.9.2 diff --git a/dist/package/wiredtiger.spec b/dist/package/wiredtiger.spec index ca88f76b06b..aacdf327c98 100644 --- a/dist/package/wiredtiger.spec +++ b/dist/package/wiredtiger.spec @@ -1,5 +1,5 @@ Name: wiredtiger -Version: 2.9.1 +Version: 2.9.2 Release: 1%{?dist} Summary: WiredTiger data storage engine diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index fea0a4a8364..b73bd984abd 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -1,5 +1,9 @@ /*! @page upgrading Upgrading WiredTiger applications +@section version_291 Upgrading to Version 2.9.2 +
+
+ @section version_291 Upgrading to Version 2.9.1
-- cgit v1.2.1 From 190acd85f1183b11b2b2d9f90e6272f5a58fce71 Mon Sep 17 00:00:00 2001 From: David Hows Date: Fri, 23 Dec 2016 11:53:07 +1100 Subject: Fix 2.9.2 documentation stub issues --- src/docs/upgrading.dox | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index b73bd984abd..af612fb0aad 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -1,8 +1,6 @@ /*! @page upgrading Upgrading WiredTiger applications -@section version_291 Upgrading to Version 2.9.2 -
-
+@section version_292 Upgrading to Version 2.9.2 @section version_291 Upgrading to Version 2.9.1 -- cgit v1.2.1 From 20348a7afc0fb5a8dc888fd4c9885f07d70109ee Mon Sep 17 00:00:00 2001 From: Sulabh Mahajan Date: Fri, 23 Dec 2016 14:44:23 +1100 Subject: WT-2994 Create documentation describing page sizes and relationships (#3204) --- src/docs/Doxyfile | 8 + src/docs/file-formats.dox | 6 +- src/docs/programming.dox | 3 +- src/docs/spell.ok | 1 + src/docs/tune-compression.dox | 62 ----- src/docs/tune-page-size-and-comp.dox | 426 +++++++++++++++++++++++++++++++++++ src/docs/tune-page-sizes.dox | 142 ------------ src/docs/upgrading.dox | 2 +- 8 files changed, 440 insertions(+), 210 deletions(-) delete mode 100644 src/docs/tune-compression.dox create mode 100644 src/docs/tune-page-size-and-comp.dox delete mode 100644 src/docs/tune-page-sizes.dox diff --git a/src/docs/Doxyfile b/src/docs/Doxyfile index 69e9716b425..3d8c46962f1 100644 --- a/src/docs/Doxyfile +++ b/src/docs/Doxyfile @@ -216,11 +216,19 @@ ALIASES = "notyet{1}=Note: "\1" not yet supported in Wired "hrow{3}=\1\2\3" \ "hrow{4}=\1\2\3\4" \ "hrow{5}=\1\2\3\4\5" \ + "hrow{6}=\1\2\3\4\5\6" \ + "hrow{7}=\1\2\3\4\5\6\7" \ + "hrow{8}=\1\2\3\4\5\6\7\8" \ + "hrow{9}=\1\2\3\4\5\6\7\8\9" \ "row{1}=\1" \ "row{2}=\1\2" \ "row{3}=\1\2\3" \ "row{4}=\1\2\3\4" \ "row{5}=\1\2\3\4\5" \ + "row{6}=\1\2\3\4\5\6" \ + "row{7}=\1\2\3\4\5\6\7" \ + "row{8}=\1\2\3\4\5\6\7\8" \ + "row{9}=\1\2\3\4\5\6\7\8\9" \ "configstart{2}=@param config\n Configuration string, see @ref config_strings. Permitted values:\n @hrow{Name,Effect,Values}" \ "config{3}= @row{\1,\2,\3}" \ "configend=
" \ diff --git a/src/docs/file-formats.dox b/src/docs/file-formats.dox index d8990aca7a6..21dc4580bc2 100644 --- a/src/docs/file-formats.dox +++ b/src/docs/file-formats.dox @@ -110,7 +110,7 @@ considered. (See @subpage_single huffman for details.) compressing blocks of the backing object's file. The cost is additional CPU and memory use when reading and writing pages to disk. Note the additional CPU cost of block compression can be high, and should be -considered. (See @x_ref compression_formats for details.) +considered. (See @x_ref compression_considerations for details.) Block compression is disabled by default. @@ -146,7 +146,7 @@ Huffman encoding can be high, and should be considered. compressing blocks of the backing object's file. The cost is additional CPU and memory use when reading and writing pages to disk. Note the additional CPU cost of block compression can be high, and should be -considered. (See @x_ref compression_formats for details.) +considered. (See @x_ref compression_considerations for details.) Block compression is disabled by default. @@ -157,7 +157,7 @@ compression: block compression. compressing blocks of the backing object's file. The cost is additional CPU and memory use when reading and writing pages to disk. Note the additional CPU cost of block compression can be high, and should be -considered. (See @x_ref compression_formats for details.) +considered. (See @x_ref compression_considerations for details.) Block compression is disabled by default. diff --git a/src/docs/programming.dox b/src/docs/programming.dox index 81e612e8ee8..aa76bef4614 100644 --- a/src/docs/programming.dox +++ b/src/docs/programming.dox @@ -66,14 +66,13 @@ each of which is ordered by one or more columns. - @subpage_single wtstats

- @subpage_single tune_memory_allocator -- @subpage_single tune_page_sizes +- @subpage_single tune_page_size_and_comp - @subpage_single tune_cache - @subpage_single tune_bulk_load - @subpage_single tune_cursor_persist - @subpage_single tune_read_only - @subpage_single tune_durability - @subpage_single tune_checksum -- @subpage_single tune_compression - @subpage_single tune_file_alloc - @subpage_single tune_system_buffer_cache - @subpage_single tune_transparent_huge_pages diff --git a/src/docs/spell.ok b/src/docs/spell.ok index 2413cbc93fb..f87f24cef5c 100644 --- a/src/docs/spell.ok +++ b/src/docs/spell.ok @@ -51,6 +51,7 @@ LIBS LLVM LOGREC LRVv +LRU LSB LSM LZ diff --git a/src/docs/tune-compression.dox b/src/docs/tune-compression.dox deleted file mode 100644 index 8db2151aa76..00000000000 --- a/src/docs/tune-compression.dox +++ /dev/null @@ -1,62 +0,0 @@ -/*! @page tune_compression Compression - -WiredTiger includes a number of optional compression techniques. Configuring -compression generally decreases on-disk and in-memory resource requirements -and the amount of I/O, and increases CPU cost when data are read and written. - -Configuring compression may change application throughput. For example, -in applications using solid-state drives (where I/O is less expensive), -turning off compression may increase application performance by reducing -CPU costs; in applications where I/O costs are more expensive, turning on -compression may increase application performance by reducing the overall -number of I/O operations. - -An example of turning on row-store key prefix compression: - -@snippet ex_all.c Configure key prefix compression on - -An example of turning on row-store or column-store dictionary compression: - -@snippet ex_all.c Configure dictionary compression on - -@section compression_formats Block Compression Formats -WiredTiger provides two methods of compressing your data when using block -compression: the raw and noraw methods. These methods change how WiredTiger -works to fit data into the blocks that are stored on disk. - -@subsection noraw_compression Noraw Compression -Noraw compression is the traditional compression model where a fixed -amount of data is given to the compression system, then turned into a -compressed block of data. The amount of data chosen to compress is the -data needed to fill the uncompressed block. Thus when compressed, the block will -be smaller than the normal data size and the sizes written to disk will often -vary depending on how compressible the data being stored is. Algorithms -using noraw compression include zlib-noraw, lz4-noraw and snappy. - -@subsection raw_compression Raw Compression -WiredTiger's raw compression takes advantage of compressors that provide a -streaming compression API. Using the streaming API WiredTiger will try to fit -as much data as possible into one block. This means that blocks created -with raw compression should be of similar size. Using a streaming compression -method should also make for less overhead in compression, as the setup and -initial work for compressing is done fewer times compared to the amount of -data stored. Algorithms using raw compression include zlib, lz4. - -@subsection to_raw_or_noraw Choosing between Raw and Noraw Compression -When looking at which compression method to use the biggest consideration is -that raw compression will normally provide higher compression levels while -using more CPU for compression. - -An additional consideration is that raw compression may provide a performance -advantage in workloads where data is accessed sequentially. That is because -more data is generally packed into each block on disk. Conversely, noraw -compression may perform better for workloads with random access patterns -because each block will tend to be smaller and require less work to read and -decompress. - -See @ref file_formats_compression for more information on available -compression techniques. - -See @ref compression for information on how to configure and enable compression. - - */ diff --git a/src/docs/tune-page-size-and-comp.dox b/src/docs/tune-page-size-and-comp.dox new file mode 100644 index 00000000000..70e9875bcc4 --- /dev/null +++ b/src/docs/tune-page-size-and-comp.dox @@ -0,0 +1,426 @@ +/*! @page tune_page_size_and_comp Tuning page size and compression + +This document aims to explain the role played by different page sizes in +WiredTiger. It also details motivation behind an application wanting to modify +these page sizes from their default values and the procedure to do so. +Applications commonly configure page sizes based on their workload's typical key +and value size. Once a page size has been chosen, appropriate defaults for the +other configuration values are derived by WiredTiger from the page sizes, and +relatively few applications will need to modify the other page and key/value +size configuration options. WiredTiger also offers several compression options +that have an impact on the size of the data both in-memory and on-disk. Hence +while selecting page sizes, an application must also look at its desired +compression needs. Since the data and workload for a table differs from one +table to another in the database, an application can choose to set page sizes +and compression options on a per-table basis. + +@section data_life_cycle Data life cycle +Before detailing each page size, here is a review of how data gets stored inside +WiredTiger: + - WiredTiger uses the physical disks to store data durably, creating on-disk +files for the tables in the database directory. It also caches the portion of +the table being currently accessed by the application for reading or writing in +main memory. + - WiredTiger maintains a table's data in memory using a data structure called a +B-Tree ( +B+ Tree to be specific), +referring to the nodes of a B-Tree as pages. Internal pages carry only keys. The +leaf pages store both keys and values. + - The format of the in-memory pages is not the same as the format of the +on-disk pages. Therefore, the in-memory pages regularly go through a process +called reconciliation to create data structures appropriate for storage on the +disk. These data structures are referred to as on-disk pages. An application can +set a maximum size separately for the internal and leaf on-disk pages otherwise +WiredTiger uses a default value. If reconciliation of an in-memory page is +leading to an on-disk page size greater than this maximum, WiredTiger creates +multiple smaller on-disk pages. + - A component of WiredTiger called the Block Manager divides the on-disk pages +into smaller chunks called blocks, which then get written to the disk. The size +of these blocks is defined by a parameter called allocation_size, which is the +underlying unit of allocation for the file the data gets stored in. An +application might choose to have data compressed before it gets stored to disk +by enabling block compression. + - A database’s tables are usually much larger than the main memory available. +Not all of the data can be kept in memory at any given time. A process called +eviction takes care of making space for new data by freeing the memory of data +infrequently accessed. An eviction server regularly finds in-memory pages that +have not been accessed in a while (following an LRU algorithm). Several +background eviction threads continuously process these pages, reconcile them to +disk and remove them from the main memory. + - When an application does an insert or an update of a key/value pair, the +associated key is used to refer to an in-memory page. In the case of this page +not being in memory, appropriate on-disk page(s) are read and an in-memory page +constructed (the opposite of reconciliation). A data structure is maintained on +every in-memory page to store any insertions or modifications to the data done +on that page. As more and more data gets written to this page, the page’s memory +footprint keeps growing. + - An application can choose to set the maximum size a page is allowed to grow +in-memory. A default size is set by WiredTiger if the application doesn't +specify one. To keep page management efficient, as a page grows larger in-memory +and approaches this maximum size, if possible, it is split into smaller +in-memory pages. + - When doing an insert or an update, if a page grows larger than the maximum, +the application thread is used to forcefully evict this page. This is done to +split the growing page into smaller in-memory pages and reconcile them into +on-disk pages. Once written to the disk they are removed from the main memory, +making space for more data to be written. When an application gets involved in +forced eviction, it might take longer than usual to do these inserts and +updates. It is not always possible to (force) evict a page from memory and this +page can temporarily grow larger in size than the configured maximum. This page +then remains marked to be evicted and reattempts are made as the application +puts more data in it. + +@section configurable_page_struct Configurable page structures in WiredTiger +There are three page sizes that the user can configure: + 1. The maximum page size of any type of in-memory page in the WiredTiger cache, +memory_page_max. + 2. The maximum size of the on-disk page for an internal page, internal_page_max. + 3. The maximum size of the on-disk leaf page, leaf_page_max. + +There are additional configuration settings that tune more esoteric and +specialized data. Those are included for completeness but are rarely changed. + +@subsection memory_page_max memory_page_max +The maximum size a table’s page is allowed to grow to in memory before being +reconciled to disk. + - An integer, with acceptable values between 512B and 10TB + - Default size: 5 MB + - Additionally constrained by the condition: + leaf_page_max <= memory_page_max <= cache_size/10 + - Motivation to tune the value: +\n memory_page_max is significant for applications wanting to tune for +consistency in write intensive workloads. + - This is the parameter to start with for tuning and trying different values +to find the correct balance between overall throughput and individual operation +latency for each table. + - Splitting a growing in-memory page into smaller pages and reconciliation +both require exclusive access to the page which makes an application's write +operations wait. Having a large memory_page_max means that the pages will need +to be split and reconciled less often. But when that happens, the duration that +an exclusive access to the page is required is longer, increasing the latency of +an application’s insert or update operations. Conversely, having a smaller +memory_page_max reduces the time taken for splitting and reconciling the pages, +but causes it to happen more frequently, forcing more frequent but shorter +exclusive accesses to the pages. + - Applications should choose the memory_page_max value considering the +trade-off between frequency of exclusive access to the pages (for reconciliation +or splitting pages into smaller pages) versus the duration that the exclusive +access is required. + - Configuration: +\n Specified as memory_page_max configuration option to WT_SESSION::create(). An +example of such a configuration string is as follows: + +

+     "key_format=S,value_format=S,memory_page_max=10MB"
+
+ +@subsection internal_page_max internal_page_max +The maximum page size for the reconciled on-disk internal pages of the B-Tree, +in bytes. When an internal page grows past this size, it splits into multiple +pages. + - An integer, with acceptable values between 512B and 512MB + - Default size: 4 KB (*appropriate for applications with relatively small keys) + - Additionally constrained by the condition: the size must be a multiple of the +allocation size + - Motivation to tune the value: +\n internal_page_max is significant for applications wanting to avoid excessive +L2 cache misses while searching the tree. + - Recall that only keys are stored on internal pages, so the type and size of +the key values for a table help drive the setting for this parameter. + - Should be sized to fit into on-chip caches. + - Applications doing full-table scans with out-of-memory workloads might +increase internal_page_max to transfer more data per I/O. + - Influences the shape of the B-Tree, i.e. depth and the number of children +each page in B-Tree has. To iterate to the desired key/value pair in the B-Tree, +WiredTiger has to binary search the key-range in a page to determine the child +page to proceed to and continue down the depth until it reaches the correct leaf +page. Having an unusually deep B-Tree, or having too many children per page can +negatively impact time taken to iterate the B-Tree, slowing down the application. +The number of children per page and, hence, the tree depth depends upon the +number of keys that can be stored in an internal page, which is +internal_page_max divided by key size. Applications should choose an appropriate +internal_page_max size that avoids the B-Tree from getting too deep. + - Configuration: +\n Specified as internal_page_max configuration option to WT_SESSION::create(). +An example of such a configuration string is as follows: + +
+     "key_format=S,value_format=S,internal_page_max=16KB,leaf_page_max=1MB"
+
+ +@subsection leaf_page_max leaf_page_max +The maximum page size for the reconciled on-disk leaf pages of the B-Tree, in +bytes. When a leaf page grows past this size, it splits into multiple pages. + - An integer, with acceptable values between 512B and 512MB + - Default size: 32 KB (*appropriate for applications with relatively small keys +and values) + - Additionally constrained by the condition: must be a multiple of the +allocation size + - Motivation to tune the value: +\n leaf_page_max is significant for applications wanting to maximize sequential +data transfer from a storage device. + - Should be sized to maximize I/O performance (when reading from disk, it is +usually desirable to read a large amount of data, assuming some locality of +reference in the application's access pattern). + - Applications doing full-table scans through out-of-cache workloads might +increase leaf_page_max to transfer more data per I/O. + - Applications focused on read/write amplification might decrease the page +size to better match the underlying storage block size. + - Configuration: +\n Specified as leaf_page_max configuration option to WT_SESSION::create(). An +example of such a configuration string is as follows: + +
+     "key_format=S,value_format=S,internal_page_max=16KB,leaf_page_max=1MB"
+
+ +The following configuration items following are rarely used. They are described +for completeness: + +@subsection allocation_size allocation_size +This is the underlying unit of allocation for the file. As the unit of file +allocation, it sets the minimum page size and how much space is wasted when +storing small amounts of data and overflow items. + - an integer between 512B and 128 MB + - must a power-of-two + - default : 4 KB + - Motivation to tune the value: +\n Most applications should not need to tune the allocation size. + - To be compatible with virtual memory page sizes and direct I/O requirements +on the platform (4KB for most common server platforms) + - Smaller values decrease the file space required by overflow items. + - For example, if the allocation size is set to 4KB, an overflow item of +18,000 bytes requires 5 allocation units and wastes about 2KB of space. If the +allocation size is 16KB, the same overflow item would waste more than 10KB. + - Configuration: +\n Specified as allocation_size configuration option to WT_SESSION::create(). An +example of such a configuration string is as follows: + +
+     "key_format=S,value_format=S,allocation_size=4KB"
+
+ +@subsection key_val_max internal/leaf key/value max + - Overflow items +\n Overflow items are keys and values too large to easily store on a page. Overflow +items are stored separately in the file from the page where the item logically +appears, and so reading or writing an overflow item is more expensive than an +on-page item, normally requiring additional I/O. Additionally, overflow values +are not cached in memory. This means overflow items won't affect the caching +behavior of the application. It also means that each time an overflow value is +read, it is re-read from disk. + - internal_key_max +\n The largest key stored in an internal page, in bytes. If set, keys larger than +the specified size are stored as overflow items. + - The default and the maximum allowed value are both one-tenth the size of a +newly split internal page. + - leaf_key_max +\n The largest key stored in a leaf page, in bytes. If set, keys larger than the +specified size are stored as overflow items. + - The default value is one-tenth the size of a newly split leaf page. + - leaf_value_max +\n The largest value stored in a leaf page, in bytes. If set, values larger than +the specified size are stored as overflow items + - The default is one-half the size of a newly split leaf page. + - If the size is larger than the maximum leaf page size, the page size is +temporarily ignored when large values are written. + - Motivation to tune the values: +\n Most applications should not need to tune the maximum key and value sizes. +Applications requiring a small page size, but also having latency concerns such +that the additional work to retrieve an overflow item may find modifying these +values useful. +\n Since overflow items are separately stored in the on-disk file, aren't cached +and require additional I/O to access (read or write), applications should avoid +creating overflow items. + - Since page sizes also determine the default size of overflow items, i.e., +keys and values too large to easily store on a page, they can be configured to +avoid performance penalties working with overflow items: + - Applications with large keys and values, and concerned with latency, +might increase the page size to avoid creating overflow items, in order to avoid +the additional cost of retrieving them. + - Applications with large keys and values, doing random searches, might +decrease the page size to avoid wasting cache space on overflow items that +aren't likely to be needed. + - Applications with large keys and values, doing table scans, might +increase the page size to avoid creating overflow items, as the overflow items +must be read into memory in all cases, anyway. + - internal_key_max, leaf_key_max and leaf_value_max configuration values +allow applications to change the size at which a key or value will be treated +as an overflow item. + - Most applications should not need to tune the maximum key and value +sizes. + - The value of internal_key_max is relative to the maximum internal page +size. Because the number of keys on an internal page determines the depth of the +tree, the internal_key_max value can only be adjusted within a certain range, +and the configured value will be automatically adjusted by WiredTiger, if +necessary, to ensure a reasonable number of keys fit on an internal page. + - The values of leaf_key_max and leaf_value_max are not relative to the +maximum leaf page size. If either is larger than the maximum page size, the page +size will be ignored when the larger keys and values are being written, and a +larger page will be created as necessary. + - Configuration: +\n Specified as internal_key_max, leaf_key_max and leaf_value_max configuration +options to WT_SESSION::create(). An example of configuration string for a large +leaf overflow value: + +
+     "key_format=S,value_format=S,leaf_page_max=16KB,leaf_value_max=256KB"
+
+ +@subsection split_pct split_pct (split percentage) +The size (specified as percentage of internal/leaf page_max) at which the +reconciled page must be split into multiple smaller pages before being sent for +compression and then be written to the disk. If the reconciled page can fit into +a single on-disk page without the page growing beyond it's set max size, +split_pct is ignored and the page isn't split. + - an integer between 25 and 100 + - default : 75 + - Motivation to tune the value: +\n Most applications should not need to tune the split percentage size. + - This value should be selected to avoid creating a large number of tiny +pages or repeatedly splitting whenever new entries are inserted. +\n For example, if the maximum page size is 1MB, a split_pct value of 10% +would potentially result in creating a large number of 100KB pages, which may +not be optimal for future I/O. Or, if the maximum page size is 1MB, a split_pct +value of 90% would potentially result in repeatedly splitting pages as the split +pages grow to 1MB over and over. The default value for split_pct is 75%, +intended to keep large pages relatively large, while still giving split pages +room to grow. + - Configuration: +\n Specified as split_pct configuration option to WT_SESSION::create(). An +example of such a configuration string is as follows: + +
+     "key_format=S,value_format=S,split_pct=60"
+
+ +@section compression_considerations Compression considerations +WiredTiger compresses data at several stages to preserve memory and disk space. +Applications can configure these different compression algorithms to tailor +their requirements between memory, disk and CPU consumption. Compression +algorithms other than block compression work by modifying how the keys and +values are represented, and hence reduce data size in-memory and on-disk. Block +compression on the other hand compress the data in its binary representation +while saving it on the disk. + +Configuring compression may change application throughput. For example, in +applications using solid-state drives (where I/O is less expensive), turning +off compression may increase application performance by reducing CPU costs; in +applications where I/O costs are more expensive, turning on compression may +increase application performance by reducing the overall number of I/O +operations. + +WiredTiger uses some internal algorithms to compress the amount of data stored +that are not configurable, but always on. For example, run-length reduces the +size requirement by storing sequential, duplicate values in the store only a +single time (with an associated count). + +Different compression options available with WiredTiger: + - Key-prefix + - Reduces the size requirement by storing any identical key prefix only once +per page. The cost is additional CPU and memory when operating on the in-memory +tree. Specifically, reverse sequential cursor movement (but not forward) through +a prefix-compressed page or the random lookup of a key/value pair will allocate +sufficient memory to hold some number of uncompressed keys. So, for example, if +key prefix compression only saves a small number of bytes per key, the +additional memory cost of instantiating the uncompressed key may mean prefix +compression is not worthwhile. Further, in cases where the on-disk cost is the +primary concern, block compression may mean prefix compression is less useful. + - Configuration: +\n Specified as prefix_compression configuration option to +WT_SESSION::create(). Applications may limit the use of prefix compression by +configuring the minimum number of bytes that must be gained before prefix +compression is used with prefix_compression_min configuration option. An example +of such a configuration string is as follows: + +
+          "key_format=S,value_format=S,prefix_compression=true,prefix_compression_min=7"
+
+ + - Dictionary + - Reduces the size requirement by storing any identical value only once per +page. + - Configuration: +\n Specified as dictionary configuration configuration option to +WT_SESSION::create(), which specifies the maximum number of unique values +remembered in the B-Tree row-store leaf page value dictionary. An example of +such a configuration string is as follows: + +
+          "key_format=S,value_format=S,dictionary=1000"
+
+ + - Huffman + - Reduces the size requirement by compressing individual key/value items, and +can be separately configured either or both keys and values. The additional CPU +cost of Huffman encoding can be high, and should be considered. (See Huffman +Encoding for details.) + - Configuration: +\n Specified as huffman_key and/or huffman_value configuration option to +WT_SESSION::create(). These options can take values of "english" (to use a +built-in English language frequency table), "utf8" or "utf16" (to +use a custom utf8 or utf16 symbol frequency table file). An example of such a +configuration string is as follows: + +
+          "key_format=S,value_format=S,huffman_key=english,huffman_value=english"
+
+ + - Block Compression + - Reduces the size requirement of on-disk objects by compressing blocks of +the backing object's file. The additional CPU cost of block compression can be +high, and should be considered. When block compression has been configured, +configured page sizes will not match the actual size of the page on disk. + - WiredTiger provides two methods of compressing your data when using block +compression: the raw and noraw methods. These methods change how WiredTiger +works to fit data into the blocks that are stored on disk. Applications needing +to write specific sized blocks may want to consider implementing a +WT_COMPRESSOR::compress_raw function. + - Noraw compression: +\n A fixed amount of data is given to the compression system, then turned into +a compressed block of data. The amount of data chosen to compress is the data +needed to fill the uncompressed block. Thus when compressed, the block will be +smaller than the normal data size and the sizes written to disk will often vary +depending on how compressible the data being stored is. Algorithms using noraw +compression include zlib-noraw, lz4-noraw and snappy. +Noraw compression is better suited for workloads with random access patterns +because each block will tend to be smaller and require less work to read and +decompress. + - Raw compression: +\n WiredTiger's raw compression takes advantage of compressors that provide a +streaming compression API. Using the streaming API WiredTiger will try to fit as +much data as possible into one block. This means that blocks created with raw +compression should be of similar size. Using a streaming compression method +should also make for less overhead in compression, as the setup and initial work +for compressing is done fewer times compared to the amount of data stored. +Algorithms using raw compression include zlib, lz4. +Compared to noraw, raw compression provides more compression while using more +CPU. Raw compression may provide a performance advantage in workloads where data +is accessed sequentially. That is because more data is generally packed into +each block on disk. + - Configuration: +\n Specified as the block_compressor configuration option to +WT_SESSION::create(). If WiredTiger has builtin support for "lz4", "snappy", +"zlib" or "zstd" compression, these names are available as the value to the +option. An example of such a configuration string is as follows: + +
+          "key_format=S,value_format=S,block_compressor=snappy"
+
+ +See @ref compression for further information on how to configure and enable +different compression options. + +@subsection table_compress Table summarizing compression in WiredTiger + + +@hrow{Compression Type, Supported by row-store, Supported by variable col-store, + Supported by fixed col-store, Default config, Reduces in-mem size, + Reduces on-disk size, CPU and Memory cost} +@row{Key-prefix, yes, no, no, disabled, yes, yes, minor} +@row{Dictionary, yes, yes, no, disabled, yes, yes, minor} +@row{Huffman, yes, yes, no, disabled, yes, yes, can be high} +@row{Block, yes, yes, yes, disabled, no, yes, can be high} +
+ +*/ diff --git a/src/docs/tune-page-sizes.dox b/src/docs/tune-page-sizes.dox deleted file mode 100644 index 130e047a02d..00000000000 --- a/src/docs/tune-page-sizes.dox +++ /dev/null @@ -1,142 +0,0 @@ -/*! @page tune_page_sizes Page and overflow key/value sizes - -There are seven page and key/value size configuration strings: - -- allocation size (\c allocation_size), -- page sizes (\c internal_page_max and \c leaf_page_max), -- key and value sizes (\c internal_key_max, \c leaf_key_max and \c leaf_value_max), and the -- page-split percentage (\c split_pct). - -All seven are specified to the WT_SESSION::create method, in other -words, they are configurable on a per-file basis. - -Applications commonly configure page sizes, based on their workload's -typical key and value size. Once the correct page size has been chosen, -appropriate defaults for the other configuration values are derived from -the page sizes, and relatively few applications will need to modify the -other page and key/value size configuration options. - -An example of configuring page and key/value sizes: - -@snippet ex_all.c Create a table and configure the page size - -@section tune_page_sizes_sizes Page, key and value sizes - -The \c internal_page_max and \c leaf_page_max configuration values -specify a maximum size for Btree internal and leaf pages. That is, when -an internal or leaf page grows past that size, it splits into multiple -pages. Generally, internal pages should be sized to fit into on-chip -caches in order to minimize cache misses when searching the tree, while -leaf pages should be sized to maximize I/O performance (if reading from -disk is necessary, it is usually desirable to read a large amount of -data, assuming some locality of reference in the application's access -pattern). - -The default page size configurations (2KB for \c internal_page_max, 32KB -for \c leaf_page_max), are appropriate for applications with relatively -small keys and values. - -- Applications doing full-table scans through out-of-memory workloads -might increase both internal and leaf page sizes to transfer more data -per I/O. -- Applications focused on read/write amplification might decrease the page -size to better match the underlying storage block size. - -When block compression has been configured, configured page sizes will -not match the actual size of the page on disk. Block compression in -WiredTiger happens within the I/O subsystem, and so a page might split -even if subsequent compression would result in a resulting page size -small enough to leave as a single page. In other words, page sizes are -based on in-memory sizes, not on-disk sizes. Applications needing to -write specific sized blocks may want to consider implementing a -WT_COMPRESSOR::compress_raw function. - -The page sizes also determine the default size of overflow items, that -is, keys and values too large to easily store on a page. Overflow items -are stored separately in the file from the page where the item logically -appears, and so reading or writing an overflow item is more expensive -than an on-page item, normally requiring additional I/O. Additionally, -overflow values are not cached in memory. This means overflow items -won't affect the caching behavior of the application, but it also means -that each time an overflow value is read, it is re-read from disk. - -For both of these reasons, applications should avoid creating large -numbers of commonly referenced overflow items. This is especially -important for keys, as keys on internal pages are referenced during -random searches, not just during data retrieval. Generally, -applications should make every attempt to avoid creating overflow keys. - -- Applications with large keys and values, and concerned with latency, -might increase the page size to avoid creating overflow items, in order -to avoid the additional cost of retrieving them. - -- Applications with large keys and values, doing random searches, might -decrease the page size to avoid wasting cache space on overflow items -that aren't likely to be needed. - -- Applications with large keys and values, doing table scans, might -increase the page size to avoid creating overflow items, as the overflow -items must be read into memory in all cases, anyway. - -The \c internal_key_max, \c leaf_key_max and \c leaf_value_max -configuration values allow applications to change the size at which a -key or value will be treated as an overflow item. - -The value of \c internal_key_max is relative to the maximum internal -page size. Because the number of keys on an internal page determines -the depth of the tree, the \c internal_key_max value can only be -adjusted within a certain range, and the configured value will be -automatically adjusted by WiredTiger, if necessary to ensure a -reasonable number of keys fit on an internal page. - -The values of \c leaf_key_max and \c leaf_value_max are not relative to -the maximum leaf page size. If either is larger than the maximum page -size, the page size will be ignored when the larger keys and values are -being written, and a larger page will be created as necessary. - -Most applications should not need to tune the maximum key and value -sizes. Applications requiring a small page size, but also having -latency concerns such that the additional work to retrieve an overflow -item is an issue, may find them useful. - -An example of configuring a large leaf overflow value: - -@snippet ex_all.c Create a table and configure a large leaf value max - -@section tune_page_sizes_split_percentage Split percentage - -The \c split_pct configuration string configures the size of a split -page. When a page grows sufficiently large that it must be written as -multiple disk blocks, the newly written block size is \c split_pct -percent of the maximum page size. This value should be selected to -avoid creating a large number of tiny pages or repeatedly splitting -whenever new entries are inserted. For example, if the maximum page -size is 1MB, a \c split_pct value of 10% would potentially result in -creating a large number of 100KB pages, which may not be optimal for -future I/O. Or, if the maximum page size is 1MB, a \c split_pct value -of 90% would potentially result in repeatedly splitting pages as the -split pages grow to 1MB over and over. The default value for \c -split_pct is 75%, intended to keep large pages relatively large, while -still giving split pages room to grow. - -Most applications should not need to tune the split percentage size. - -@section tune_page_sizes_allocation_size Allocation size - -The \c allocation_size configuration value is the underlying unit of -allocation for the file. As the unit of file allocation, it sets the -minimum page size and how much space is wasted when storing small -amounts of data and overflow items. For example, if the allocation size -is set to 4KB, an overflow item of 18,000 bytes requires 5 allocation -units and wastes about 2KB of space. If the allocation size is 16KB, -the same overflow item would waste more than 10KB. - -The default allocation size is 4KB, chosen for compatibility with -virtual memory page sizes and direct I/O requirements on common server -platforms. - -Most applications should not need to tune the allocation size; it is -primarily intended for applications coping with the specific -requirements some file systems make to support features like direct I/O. - -*/ diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index af612fb0aad..59a299d48a1 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -321,7 +321,7 @@ be updated. The WT_SESSION::create \c internal_item_max and \c leaf_item_max configuration strings are now deprecated in favor of the \c internal_key_max, \c leaf_key_max, and \c leaf_value_max -configuration strings. See @ref tune_page_sizes for more information. +configuration strings. See @ref tune_page_size_and_comp for more information.

-- cgit v1.2.1 From 0605d628342faaaeb5fea7c6f816dda151c412f4 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 22 Dec 2016 22:58:28 -0500 Subject: WT-3092 Quiet a warning from autogen.sh (#3211) --- build_posix/aclocal/options.m4 | 2 +- lang/python/Makefile.am | 3 ++- src/cursor/cur_std.c | 1 + src/include/extern.h | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/build_posix/aclocal/options.m4 b/build_posix/aclocal/options.m4 index 7043430a6d6..bc4b31dfee3 100644 --- a/build_posix/aclocal/options.m4 +++ b/build_posix/aclocal/options.m4 @@ -57,7 +57,7 @@ AH_TEMPLATE( HAVE_CRC32_HARDWARE, [Define to 1 to configure CRC32 hardware support.]) AC_MSG_CHECKING(if --enable-crc32-hardware option specified) AC_ARG_ENABLE(crc32-hardware, - AC_HELP_STRING([--enable-crc32-hardware], + AS_HELP_STRING([--enable-crc32-hardware], [Enable CRC32 hardware support.]), r=$enableval, r=yes) case "$r" in no) wt_cv_enable_crc32_hardware=no;; diff --git a/lang/python/Makefile.am b/lang/python/Makefile.am index 03c65a57028..b32d0321194 100644 --- a/lang/python/Makefile.am +++ b/lang/python/Makefile.am @@ -17,7 +17,8 @@ install-exec-local: (cd $(PYSRC) && \ $(PYTHON) setup.py build_py -d $(abs_builddir)/build && \ $(PYTHON) setup.py build_ext -f -b $(abs_builddir)/build $(PYDIRS) && \ - $(PYTHON) setup.py install_lib -b $(abs_builddir)/build --skip-build $(PYTHON_INSTALL_ARG)) + $(PYTHON) setup.py install_lib -b $(abs_builddir)/build --skip-build $(PYTHON_INSTALL_ARG) && \ + rm -rf $(abs_builddir)/build) # We build in different places for an install vs running from the tree: # clean up both. Don't rely on "setup.py clean" -- everything that should diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c index 6264de89df9..7ace6d49cf0 100644 --- a/src/cursor/cur_std.c +++ b/src/cursor/cur_std.c @@ -144,6 +144,7 @@ __wt_cursor_set_notsup(WT_CURSOR *cursor) */ int __wt_cursor_kv_not_set(WT_CURSOR *cursor, bool key) + WT_GCC_FUNC_ATTRIBUTE((cold)) { WT_SESSION_IMPL *session; diff --git a/src/include/extern.h b/src/include/extern.h index 4824dc93d96..2fb92c5faf0 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -315,7 +315,7 @@ extern int __wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *e extern int __wt_cursor_search_near_notsup(WT_CURSOR *cursor, int *exact) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_cursor_reconfigure_notsup(WT_CURSOR *cursor, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_cursor_set_notsup(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_kv_not_set(WT_CURSOR *cursor, bool key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_cursor_kv_not_set(WT_CURSOR *cursor, bool key) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_cursor_get_key(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_cursor_set_key(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_cursor_get_raw_key(WT_CURSOR *cursor, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -- cgit v1.2.1 From 3eaa4ea8d458f1a57d3aac916e2bc8a59450af97 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Fri, 23 Dec 2016 15:03:07 +1100 Subject: WT-3086 Add information about transaction state to cache stuck diagnostics (#3214) --- src/evict/evict_lru.c | 188 ++++++++++++++++++++++++++++++++++++++++++-------- src/include/extern.h | 2 +- 2 files changed, 159 insertions(+), 31 deletions(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 6fa728916de..0a2a9d28402 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -365,7 +365,7 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work) ret = ETIMEDOUT; __wt_err(session, ret, "Cache stuck for too long, giving up"); - WT_TRET(__wt_cache_dump(session, NULL)); + WT_TRET(__wt_dump_stuck_info(session, NULL)); return (ret); } #endif @@ -1974,15 +1974,116 @@ __wt_evict_priority_clear(WT_SESSION_IMPL *session) } #ifdef HAVE_DIAGNOSTIC +static int __dump_txn_state(WT_SESSION_IMPL *, FILE *fp); +static int __dump_cache(WT_SESSION_IMPL *, FILE *fp); /* - * __wt_cache_dump -- - * Dump debugging information to a file (default stderr) about the size of - * the files in the cache. + * __dump_txn_state -- + * Output debugging information about the global transaction state. */ int -__wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) +__dump_txn_state(WT_SESSION_IMPL *session, FILE *fp) +{ + WT_CONNECTION_IMPL *conn; + WT_TXN_GLOBAL *txn_global; + WT_TXN *txn; + WT_TXN_STATE *s; + const char *iso_tag; + uint64_t id; + uint32_t i, session_cnt; + + conn = S2C(session); + txn_global = &conn->txn_global; + WT_ORDERED_READ(session_cnt, conn->session_cnt); + + /* Note: odd string concatenation avoids spelling errors. */ + if (fprintf(fp, "==========\n" "transaction state dump\n") < 0) + return (EIO); + + if (fprintf(fp, + "current ID: %" PRIu64 "\n" + "last running ID: %" PRIu64 "\n" + "oldest ID: %" PRIu64 "\n" + "oldest named snapshot ID: %" PRIu64 "\n", + txn_global->current, txn_global->last_running, + txn_global->oldest_id, txn_global->nsnap_oldest_id) < 0) + return (EIO); + + if (fprintf(fp, + "checkpoint running? %s\n" + "checkpoint generation: %" PRIu64 "\n" + "checkpoint pinned ID: %" PRIu64 "\n" + "checkpoint txn ID: %" PRIu64 "\n" + "session count: %" PRIu32 "\n", + txn_global->checkpoint_running ? "yes" : "no", + txn_global->checkpoint_gen, + txn_global->checkpoint_pinned, + txn_global->checkpoint_txnid, + session_cnt) < 0) + return (EIO); + + if (fprintf(fp, "Dumping transaction state of active sessions\n") < 0) + return (EIO); + + /* + * Walk each session transaction state and dump information. Accessing + * the content of session handles is not thread safe, so some + * information may change while traversing if other threads are active + * at the same time, which is OK since this is diagnostic code. + */ + for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) { + /* Skip sessions with no active transaction */ + if ((id = s->id) == WT_TXN_NONE && s->pinned_id == WT_TXN_NONE) + continue; + + txn = &conn->sessions[i].txn; + switch (txn->isolation) { + case WT_ISO_READ_COMMITTED: + iso_tag = "WT_ISO_READ_COMMITTED"; + break; + case WT_ISO_READ_UNCOMMITTED: + iso_tag = "WT_ISO_READ_UNCOMMITTED"; + break; + case WT_ISO_SNAPSHOT: + iso_tag = "WT_ISO_SNAPSHOT"; + break; + default: + iso_tag = "INVALID"; + break; + } + + if (fprintf(fp, + "ID: %6" PRIu64 + ", mod count: %u" + ", pinned ID: %" PRIu64 + ", snap min: %" PRIu64 + ", snap max: %" PRIu64 + ", metadata pinned ID: %" PRIu64 + ", flags: 0x%08" PRIx32 + ", name: %s" + ", isolation: %s" "\n", + id, + txn->mod_count, + s->pinned_id, + txn->snap_min, + txn->snap_max, + s->metadata_pinned, + txn->flags, + conn->sessions[i].name == NULL ? + "EMPTY" : conn->sessions[i].name, + iso_tag) < 0) + return (EIO); + } + + return (0); +} + +/* + * __dump_cache -- + * Output debugging information about the size of the files in cache. + */ +int +__dump_cache(WT_SESSION_IMPL *session, FILE *fp) { - FILE *fp; WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle, *saved_dhandle; WT_PAGE *page; @@ -1997,13 +2098,9 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) conn = S2C(session); total_bytes = total_dirty_bytes = 0; - if (ofile == NULL) - fp = stderr; - else if ((fp = fopen(ofile, "w")) == NULL) - return (EIO); - /* Note: odd string concatenation avoids spelling errors. */ - (void)fprintf(fp, "==========\n" "cache dump\n"); + if (fprintf(fp, "==========\n" "cache dump\n") < 0) + return (EIO); saved_dhandle = session->dhandle; TAILQ_FOREACH(dhandle, &conn->dhqh, q) { @@ -2048,13 +2145,17 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) } session->dhandle = NULL; - if (dhandle->checkpoint == NULL) - (void)fprintf(fp, "%s(): \n", dhandle->name); - else - (void)fprintf(fp, "%s(checkpoint=%s): \n", - dhandle->name, dhandle->checkpoint); - if (intl_pages != 0) - (void)fprintf(fp, + if (dhandle->checkpoint == NULL) { + if (fprintf(fp, + "%s(): \n", dhandle->name) < 0) + return (EIO); + } else { + if (fprintf(fp, "%s(checkpoint=%s): \n", + dhandle->name, dhandle->checkpoint) < 0) + return (EIO); + } + if (intl_pages != 0) { + if (fprintf(fp, "\t" "internal: " "%" PRIu64 " pages, " "%" PRIu64 "MB, " @@ -2069,9 +2170,11 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) (intl_bytes - intl_dirty_bytes) >> 20, intl_dirty_bytes >> 20, intl_bytes_max >> 20, - intl_dirty_bytes_max >> 20); - if (leaf_pages != 0) - (void)fprintf(fp, + intl_dirty_bytes_max >> 20) < 0) + return (EIO); + } + if (leaf_pages != 0) { + if (fprintf(fp, "\t" "leaf: " "%" PRIu64 " pages, " "%" PRIu64 "MB, " @@ -2086,7 +2189,9 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) (leaf_bytes - leaf_dirty_bytes) >> 20, leaf_dirty_bytes >> 20, leaf_bytes_max >> 20, - leaf_dirty_bytes_max >> 20); + leaf_dirty_bytes_max >> 20) < 0) + return (EIO); + } total_bytes += intl_bytes + leaf_bytes; total_dirty_bytes += intl_dirty_bytes + leaf_dirty_bytes; @@ -2099,16 +2204,39 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) */ total_bytes = __wt_cache_bytes_plus_overhead(conn->cache, total_bytes); - (void)fprintf(fp, + if (fprintf(fp, "cache dump: " - "total found = %" PRIu64 "MB vs tracked inuse %" PRIu64 "MB\n" - "total dirty bytes = %" PRIu64 "MB\n", + "total found: %" PRIu64 "MB vs tracked inuse %" PRIu64 "MB\n" + "total dirty bytes: %" PRIu64 "MB\n", total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20, - total_dirty_bytes >> 20); - (void)fprintf(fp, "==========\n"); - - if (ofile != NULL && fclose(fp) != 0) + total_dirty_bytes >> 20) < 0) return (EIO); + if (fprintf(fp, "==========\n") < 0) + return (EIO); + return (0); } + +/* + * __wt_dump_stuck_info -- + * Dump debugging information to a file (default stderr) about the state + * of WiredTiger when we have determined that the cache is stuck full. + */ +int +__wt_dump_stuck_info(WT_SESSION_IMPL *session, const char *ofile) +{ + FILE *fp; + WT_DECL_RET; + + if (ofile == NULL) + fp = stderr; + else if ((fp = fopen(ofile, "w")) == NULL) + return (EIO); + + WT_ERR(__dump_txn_state(session, fp)); + WT_ERR(__dump_cache(session, fp)); +err: if (ofile != NULL && fclose(fp) != 0) + return (EIO); + return (ret); +} #endif diff --git a/src/include/extern.h b/src/include/extern.h index 2fb92c5faf0..bb7fbddcae5 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -352,7 +352,7 @@ extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_dump_stuck_info(WT_SESSION_IMPL *session, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -- cgit v1.2.1 From 9216a5b64ec51bc1e381b96fe85345915d8fcaeb Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 22 Dec 2016 23:12:30 -0500 Subject: WT-3093 Reduce the size of WT_PAGE. (#3212) * Inline read/write locks in their structures the same way we handle spinlocks. WiredTiger currently has no need for a separate allocation, that was left over from the original POSIX pthread implementation. * Remove the lock name field from the read/write lock structure, shrinking the lock from 16B to 8B, the name field was never used, and it should be easy to identify the read/write lock's purpose from the enclosing structure. This means we no longer need two separate structures (the lock and the lock plus name), which simplifies the actual implementation. * Reduce the WT_PAGE size by pushing all of the variable-length column-store RLE array off-page into a separate allocation (instead of just the array itself), and moving the number-of-entries for the leaf pages out of the per page-type union. The latter change simplifies a bunch of stuff, row-store and fixed-length column-store no longer require a structure in the union at all, and lots of the #define's to handle that go away. * Move WT_ITEM.flags to the end of the structure, there's no reason to leave it in the middle anymore, and it's stylistically odd. --- src/block/block_write.c | 4 +- src/btree/bt_curnext.c | 4 +- src/btree/bt_curprev.c | 8 +-- src/btree/bt_cursor.c | 15 +++--- src/btree/bt_debug.c | 6 +-- src/btree/bt_delete.c | 7 ++- src/btree/bt_discard.c | 9 ++-- src/btree/bt_handle.c | 3 +- src/btree/bt_ovfl.c | 8 +-- src/btree/bt_page.c | 28 ++++++----- src/btree/bt_rebalance.c | 2 +- src/btree/bt_ret.c | 4 +- src/btree/bt_slvg.c | 26 +++++----- src/btree/bt_split.c | 6 +-- src/btree/bt_stat.c | 3 +- src/btree/bt_vrfy.c | 8 +-- src/btree/col_modify.c | 5 +- src/btree/col_srch.c | 7 ++- src/btree/row_key.c | 12 ++--- src/btree/row_modify.c | 9 ++-- src/btree/row_srch.c | 24 ++++----- src/conn/conn_dhandle.c | 2 +- src/conn/conn_handle.c | 3 +- src/conn/conn_log.c | 26 +++++----- src/conn/conn_sweep.c | 8 +-- src/cursor/cur_backup.c | 16 +++--- src/cursor/cur_log.c | 4 +- src/evict/evict_lru.c | 2 +- src/include/btmem.h | 110 +++++++++++++++++++++--------------------- src/include/btree.h | 2 +- src/include/btree.i | 6 +-- src/include/column.i | 23 ++++----- src/include/connection.h | 2 +- src/include/cursor.h | 4 +- src/include/dhandle.h | 2 +- src/include/extern.h | 20 ++++---- src/include/log.h | 2 +- src/include/lsm.h | 2 +- src/include/mutex.h | 18 ++----- src/include/thread_group.h | 2 +- src/include/txn.h | 4 +- src/include/verify_build.h | 1 - src/include/wiredtiger.in | 12 ++--- src/include/wt_internal.h | 6 ++- src/log/log.c | 12 ++--- src/lsm/lsm_cursor.c | 2 +- src/lsm/lsm_tree.c | 10 ++-- src/reconcile/rec_track.c | 8 +-- src/reconcile/rec_write.c | 9 ++-- src/schema/schema_util.c | 6 +-- src/session/session_api.c | 4 +- src/session/session_dhandle.c | 21 ++++---- src/support/mtx_rw.c | 93 +++++++++++------------------------ src/support/thread_group.c | 26 +++++----- src/txn/txn.c | 22 ++++----- src/txn/txn_ckpt.c | 10 ++-- src/txn/txn_nsnap.c | 12 ++--- 57 files changed, 328 insertions(+), 382 deletions(-) diff --git a/src/block/block_write.c b/src/block/block_write.c index d08aba45920..ea7859d6a38 100644 --- a/src/block/block_write.c +++ b/src/block/block_write.c @@ -43,10 +43,10 @@ __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) * more targeted solution at some point. */ if (!conn->hot_backup) { - __wt_readlock(session, conn->hot_backup_lock); + __wt_readlock(session, &conn->hot_backup_lock); if (!conn->hot_backup) ret = __wt_ftruncate(session, block->fh, len); - __wt_readunlock(session, conn->hot_backup_lock); + __wt_readunlock(session, &conn->hot_backup_lock); } /* diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c index 4d3976f9647..ba5fceae7c7 100644 --- a/src/btree/bt_curnext.c +++ b/src/btree/bt_curnext.c @@ -338,7 +338,7 @@ new_insert: if ((ins = cbt->ins) != NULL) { } /* Check for the end of the page. */ - if (cbt->row_iteration_slot >= page->pg_row_entries * 2 + 1) + if (cbt->row_iteration_slot >= page->entries * 2 + 1) return (WT_NOTFOUND); ++cbt->row_iteration_slot; @@ -356,7 +356,7 @@ new_insert: if ((ins = cbt->ins) != NULL) { cbt->ins = NULL; cbt->slot = cbt->row_iteration_slot / 2 - 1; - rip = &page->pg_row_d[cbt->slot]; + rip = &page->pg_row[cbt->slot]; upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip)); if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd)) { if (__wt_txn_visible_all(session, upd->txnid)) diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c index 2dd443ffac1..602c01b60eb 100644 --- a/src/btree/bt_curprev.c +++ b/src/btree/bt_curprev.c @@ -458,13 +458,13 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage) if (!F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS)) WT_RET(__wt_row_leaf_keys(session, page)); - if (page->pg_row_entries == 0) + if (page->entries == 0) cbt->ins_head = WT_ROW_INSERT_SMALLEST(page); else cbt->ins_head = - WT_ROW_INSERT_SLOT(page, page->pg_row_entries - 1); + WT_ROW_INSERT_SLOT(page, page->entries - 1); cbt->ins = WT_SKIP_LAST(cbt->ins_head); - cbt->row_iteration_slot = page->pg_row_entries * 2 + 1; + cbt->row_iteration_slot = page->entries * 2 + 1; cbt->rip_saved = NULL; goto new_insert; } @@ -515,7 +515,7 @@ new_insert: if ((ins = cbt->ins) != NULL) { cbt->ins = NULL; cbt->slot = cbt->row_iteration_slot / 2 - 1; - rip = &page->pg_row_d[cbt->slot]; + rip = &page->pg_row[cbt->slot]; upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip)); if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd)) { if (__wt_txn_visible_all(session, upd->txnid)) diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 650289f2cd8..d18b9b76992 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -163,7 +163,7 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) * column-store pages don't have slots, but map one-to-one to * keys, check for retrieval past the end of the page. */ - if (cbt->recno >= cbt->ref->ref_recno + page->pg_fix_entries) + if (cbt->recno >= cbt->ref->ref_recno + page->entries) return (false); /* @@ -173,9 +173,9 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) break; case BTREE_COL_VAR: /* The search function doesn't check for empty pages. */ - if (page->pg_var_entries == 0) + if (page->entries == 0) return (false); - WT_ASSERT(session, cbt->slot < page->pg_var_entries); + WT_ASSERT(session, cbt->slot < page->entries); /* * Column-store updates are stored as "insert" objects. If @@ -191,16 +191,16 @@ __cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) * backing store; check the cell for a record already deleted * when read. */ - cip = &page->pg_var_d[cbt->slot]; + cip = &page->pg_var[cbt->slot]; if ((cell = WT_COL_PTR(page, cip)) == NULL || __wt_cell_type(cell) == WT_CELL_DEL) return (false); break; case BTREE_ROW: /* The search function doesn't check for empty pages. */ - if (page->pg_row_entries == 0) + if (page->entries == 0) return (false); - WT_ASSERT(session, cbt->slot < page->pg_row_entries); + WT_ASSERT(session, cbt->slot < page->entries); /* * See above: for row-store, no insert object can have the same @@ -418,8 +418,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) * might be legitimately positioned after the last page slot). * Ignore those cases, it makes things too complicated. */ - if (cbt->slot != 0 && - cbt->slot != cbt->ref->page->pg_row_entries - 1) + if (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1) valid = __cursor_valid(cbt, &upd); } if (!valid) { diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index d507cc0e396..957ccdbea1a 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -662,18 +662,18 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) break; case WT_PAGE_COL_FIX: WT_RET(ds->f(ds, " recno %" PRIu64, ref->ref_recno)); - entries = page->pg_fix_entries; + entries = page->entries; break; case WT_PAGE_COL_VAR: WT_RET(ds->f(ds, " recno %" PRIu64, ref->ref_recno)); - entries = page->pg_var_entries; + entries = page->entries; break; case WT_PAGE_ROW_INT: WT_INTL_INDEX_GET(session, page, pindex); entries = pindex->entries; break; case WT_PAGE_ROW_LEAF: - entries = page->pg_row_entries; + entries = page->entries; break; WT_ILLEGAL_VALUE(session); } diff --git a/src/btree/bt_delete.c b/src/btree/bt_delete.c index 00e41475de9..b55ad291c5e 100644 --- a/src/btree/bt_delete.c +++ b/src/btree/bt_delete.c @@ -318,13 +318,12 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) * hard case is if a page splits: the update structures might be moved * to different pages, and we still have to find them all for an abort. */ - if (page_del != NULL) WT_RET(__wt_calloc_def( - session, page->pg_row_entries + 1, &page_del->update_list)); + session, page->entries + 1, &page_del->update_list)); /* Allocate the per-page update array. */ - WT_ERR(__wt_calloc_def(session, page->pg_row_entries, &upd_array)); + WT_ERR(__wt_calloc_def(session, page->entries, &upd_array)); page->modify->mod_row_update = upd_array; /* @@ -332,7 +331,7 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) * structures, fill in the per-page update array with references to * deleted items. */ - for (i = 0, size = 0; i < page->pg_row_entries; ++i) { + for (i = 0, size = 0; i < page->entries; ++i) { WT_ERR(__wt_calloc_one(session, &upd)); WT_UPDATE_DELETED_SET(upd); diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index c2733d6567b..d2beb84fee9 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -206,8 +206,7 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page) if (mod->mod_col_update != NULL) __free_skip_array(session, mod->mod_col_update, page->type == - WT_PAGE_COL_FIX ? 1 : page->pg_var_entries, - update_ignore); + WT_PAGE_COL_FIX ? 1 : page->entries, update_ignore); break; case WT_PAGE_ROW_LEAF: /* @@ -219,12 +218,12 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page) */ if (mod->mod_row_insert != NULL) __free_skip_array(session, mod->mod_row_insert, - page->pg_row_entries + 1, update_ignore); + page->entries + 1, update_ignore); /* Free the update array. */ if (mod->mod_row_update != NULL) __free_update(session, mod->mod_row_update, - page->pg_row_entries, update_ignore); + page->entries, update_ignore); break; } @@ -332,7 +331,7 @@ static void __free_page_col_var(WT_SESSION_IMPL *session, WT_PAGE *page) { /* Free the RLE lookup array. */ - __wt_free(session, page->pg_var_repeats); + __wt_free(session, page->u.col_var.repeats); } /* diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 47c7972dd57..6ed70788759 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -359,8 +359,7 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) } /* Initialize locks. */ - WT_RET(__wt_rwlock_alloc( - session, &btree->ovfl_lock, "btree overflow lock")); + __wt_rwlock_init(session, &btree->ovfl_lock); WT_RET(__wt_spin_init(session, &btree->flush_lock, "btree flush")); btree->checkpointing = WT_CKPT_OFF; /* Not checkpointing */ diff --git a/src/btree/bt_ovfl.c b/src/btree/bt_ovfl.c index 29ea561db3a..ae0da62af57 100644 --- a/src/btree/bt_ovfl.c +++ b/src/btree/bt_ovfl.c @@ -67,11 +67,11 @@ __wt_ovfl_read(WT_SESSION_IMPL *session, * Acquire the overflow lock, and retest the on-page cell's value inside * the lock. */ - __wt_readlock(session, S2BT(session)->ovfl_lock); + __wt_readlock(session, &S2BT(session)->ovfl_lock); ret = __wt_cell_type_raw(unpack->cell) == WT_CELL_VALUE_OVFL_RM ? __wt_ovfl_txnc_search(page, unpack->data, unpack->size, store) : __ovfl_read(session, unpack->data, unpack->size, store); - __wt_readunlock(session, S2BT(session)->ovfl_lock); + __wt_readunlock(session, &S2BT(session)->ovfl_lock); return (ret); } @@ -249,7 +249,7 @@ __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell) * Acquire the overflow lock to avoid racing with a thread reading the * backing overflow blocks. */ - __wt_writelock(session, btree->ovfl_lock); + __wt_writelock(session, &btree->ovfl_lock); switch (unpack->raw) { case WT_CELL_KEY_OVFL: @@ -263,7 +263,7 @@ __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell) WT_ILLEGAL_VALUE(session); } - __wt_writeunlock(session, btree->ovfl_lock); + __wt_writeunlock(session, &btree->ovfl_lock); /* Free the backing disk blocks. */ return (bm->free(bm, session, unpack->data, unpack->size)); diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c index 7bac7079fe8..f20f6398e37 100644 --- a/src/btree/bt_page.c +++ b/src/btree/bt_page.c @@ -67,7 +67,7 @@ __wt_page_alloc(WT_SESSION_IMPL *session, switch (type) { case WT_PAGE_COL_FIX: - page->pg_fix_entries = alloc_entries; + page->entries = alloc_entries; break; case WT_PAGE_COL_INT: case WT_PAGE_ROW_INT: @@ -102,12 +102,12 @@ err: if ((pindex = WT_INTL_INDEX_GET_SAFE(page)) != NULL) { } break; case WT_PAGE_COL_VAR: - page->pg_var_d = (WT_COL *)((uint8_t *)page + sizeof(WT_PAGE)); - page->pg_var_entries = alloc_entries; + page->pg_var = (WT_COL *)((uint8_t *)page + sizeof(WT_PAGE)); + page->entries = alloc_entries; break; case WT_PAGE_ROW_LEAF: - page->pg_row_d = (WT_ROW *)((uint8_t *)page + sizeof(WT_PAGE)); - page->pg_row_entries = alloc_entries; + page->pg_row = (WT_ROW *)((uint8_t *)page + sizeof(WT_PAGE)); + page->entries = alloc_entries; break; WT_ILLEGAL_VALUE(session); } @@ -333,9 +333,10 @@ __inmem_col_var( WT_CELL *cell; WT_CELL_UNPACK *unpack, _unpack; const WT_PAGE_HEADER *dsk; + size_t size; uint64_t rle; - size_t bytes_allocated; uint32_t i, indx, n, repeat_off; + void *p; btree = S2BT(session); dsk = page->dsk; @@ -343,7 +344,6 @@ __inmem_col_var( repeats = NULL; repeat_off = 0; unpack = &_unpack; - bytes_allocated = 0; /* * Walk the page, building references: the page contains unsorted value @@ -351,7 +351,7 @@ __inmem_col_var( * (WT_CELL_VALUE_OVFL) or deleted items (WT_CELL_DEL). */ indx = 0; - cip = page->pg_var_d; + cip = page->pg_var; WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { __wt_cell_unpack(cell, unpack); WT_COL_PTR_SET(cip, WT_PAGE_DISK_OFFSET(page, cell)); @@ -367,12 +367,14 @@ __inmem_col_var( if (rle > 1) { if (repeats == NULL) { __inmem_col_var_repeats(session, page, &n); - WT_RET(__wt_realloc_def(session, - &bytes_allocated, n + 1, &repeats)); + size = sizeof(WT_COL_VAR_REPEAT) + + (n + 1) * sizeof(WT_COL_RLE); + WT_RET(__wt_calloc(session, 1, size, &p)); + *sizep += size; - page->pg_var_repeats = repeats; + page->u.col_var.repeats = p; page->pg_var_nrepeats = n; - *sizep += bytes_allocated; + repeats = page->pg_var_repeats; } repeats[repeat_off].indx = indx; repeats[repeat_off].recno = recno; @@ -569,7 +571,7 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page) unpack = &_unpack; /* Walk the page, building indices. */ - rip = page->pg_row_d; + rip = page->pg_row; WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { __wt_cell_unpack(cell, unpack); switch (unpack->type) { diff --git a/src/btree/bt_rebalance.c b/src/btree/bt_rebalance.c index 29380459b94..24b4f7bb33d 100644 --- a/src/btree/bt_rebalance.c +++ b/src/btree/bt_rebalance.c @@ -265,7 +265,7 @@ __rebalance_row_leaf_key(WT_SESSION_IMPL *session, */ WT_RET(__wt_bt_read(session, rs->tmp1, addr, addr_len)); WT_RET(__wt_page_inmem(session, NULL, rs->tmp1->data, 0, 0, &page)); - ret = __wt_row_leaf_key_copy(session, page, &page->pg_row_d[0], key); + ret = __wt_row_leaf_key_copy(session, page, &page->pg_row[0], key); __wt_page_out(session, &page); return (ret); } diff --git a/src/btree/bt_ret.c b/src/btree/bt_ret.c index 8ef2db67e7b..6409a1a180c 100644 --- a/src/btree/bt_ret.c +++ b/src/btree/bt_ret.c @@ -64,10 +64,10 @@ __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) } /* Take the value from the original page cell. */ - cell = WT_COL_PTR(page, &page->pg_var_d[cbt->slot]); + cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]); break; case WT_PAGE_ROW_LEAF: - rip = &page->pg_row_d[cbt->slot]; + rip = &page->pg_row[cbt->slot]; /* * If the cursor references a WT_INSERT item, take its key. diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c index fde4d4fb9de..a8243eba17f 100644 --- a/src/btree/bt_slvg.c +++ b/src/btree/bt_slvg.c @@ -603,9 +603,9 @@ __slvg_trk_leaf(WT_SESSION_IMPL *session, */ WT_ERR(__wt_page_inmem(session, NULL, dsk, 0, 0, &page)); WT_ERR(__wt_row_leaf_key_copy(session, - page, &page->pg_row_d[0], &trk->row_start)); - WT_ERR(__wt_row_leaf_key_copy(session, page, - &page->pg_row_d[page->pg_row_entries - 1], &trk->row_stop)); + page, &page->pg_row[0], &trk->row_start)); + WT_ERR(__wt_row_leaf_key_copy(session, + page, &page->pg_row[page->entries - 1], &trk->row_stop)); __wt_verbose(session, WT_VERB_SALVAGE, "%s start key %s", @@ -1244,10 +1244,10 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) WT_RET(__wt_page_in(session, ref, 0)); page = ref->page; - entriesp = page->type == WT_PAGE_COL_VAR ? - &page->pg_var_entries : &page->pg_fix_entries; + entriesp = + page->type == WT_PAGE_COL_VAR ? &page->entries : &page->entries; - save_col_var = page->pg_var_d; + save_col_var = page->pg_var; save_entries = *entriesp; /* @@ -1303,7 +1303,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) WT_ERR(__wt_reconcile(session, ref, cookie, WT_VISIBILITY_ERR, NULL)); /* Reset the page. */ - page->pg_var_d = save_col_var; + page->pg_var = save_col_var; *entriesp = save_entries; ret = __wt_page_release(session, ref, 0); @@ -1973,14 +1973,14 @@ __slvg_row_build_leaf( /* We should have selected some entries, but not the entire page. */ WT_ASSERT(session, skip_start + skip_stop > 0 && - skip_start + skip_stop < page->pg_row_entries); + skip_start + skip_stop < page->entries); /* * Take a copy of this page's first key to define the start of * its range. The key may require processing, otherwise, it's * a copy from the page. */ - rip = page->pg_row_d + skip_start; + rip = page->pg_row + skip_start; WT_ERR(__wt_row_leaf_key(session, page, rip, key, false)); WT_ERR(__wt_row_ikey_incr( session, ref->home, 0, key->data, key->size, ref)); @@ -1988,14 +1988,14 @@ __slvg_row_build_leaf( /* Set the referenced flag on overflow pages we're using. */ if (trk->trk_ovfl_cnt != 0) WT_ERR(__slvg_row_ovfl(session, - trk, page, skip_start, page->pg_row_entries - skip_stop)); + trk, page, skip_start, page->entries - skip_stop)); /* * Change the page to reflect the correct record count: there is no * need to copy anything on the page itself, the entries value limits * the number of page items. */ - page->pg_row_entries -= skip_stop; + page->entries -= skip_stop; cookie->skip = skip_start; /* @@ -2014,7 +2014,7 @@ __slvg_row_build_leaf( WT_ERR(__wt_reconcile(session, ref, cookie, WT_VISIBILITY_ERR, NULL)); /* Reset the page. */ - page->pg_row_entries += skip_stop; + page->entries += skip_stop; /* * Discard our hazard pointer and evict the page, updating the @@ -2081,7 +2081,7 @@ __slvg_row_ovfl(WT_SESSION_IMPL *session, * We're merging a row-store page, and we took some number of records, * figure out which (if any) overflow records we used. */ - for (rip = page->pg_row_d + start; start < stop; ++start, ++rip) { + for (rip = page->pg_row + start; start < stop; ++start, ++rip) { copy = WT_ROW_KEY_COPY(rip); (void)__wt_row_leaf_key_info( page, copy, NULL, &cell, NULL, NULL); diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index fe49f937719..6b0b8a08c02 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -1770,9 +1770,9 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) /* Find the last item on the page. */ if (type == WT_PAGE_ROW_LEAF) - ins_head = page->pg_row_entries == 0 ? + ins_head = page->entries == 0 ? WT_ROW_INSERT_SMALLEST(page) : - WT_ROW_INSERT_SLOT(page, page->pg_row_entries - 1); + WT_ROW_INSERT_SLOT(page, page->entries - 1); else ins_head = WT_COL_APPEND(page); moved_ins = WT_SKIP_LAST(ins_head); @@ -1822,7 +1822,7 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) key->size = WT_INSERT_KEY_SIZE(ins); } else WT_ERR(__wt_row_leaf_key( - session, page, &page->pg_row_d[0], key, true)); + session, page, &page->pg_row[0], key, true)); WT_ERR(__wt_row_ikey(session, 0, key->data, key->size, child)); parent_incr += sizeof(WT_IKEY) + key->size; __wt_scr_free(session, &key); diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c index f4701a858d5..0da0e0807bd 100644 --- a/src/btree/bt_stat.c +++ b/src/btree/bt_stat.c @@ -106,8 +106,7 @@ __stat_page(WT_SESSION_IMPL *session, WT_PAGE *page, WT_DSRC_STATS **stats) switch (page->type) { case WT_PAGE_COL_FIX: WT_STAT_INCR(session, stats, btree_column_fix); - WT_STAT_INCRV( - session, stats, btree_entries, page->pg_fix_entries); + WT_STAT_INCRV(session, stats, btree_entries, page->entries); break; case WT_PAGE_COL_INT: WT_STAT_INCR(session, stats, btree_column_internal); diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c index 340f9bb6f0e..05990918215 100644 --- a/src/btree/bt_vrfy.c +++ b/src/btree/bt_vrfy.c @@ -386,7 +386,7 @@ recno_chk: if (recno != vs->record_total + 1) } switch (page->type) { case WT_PAGE_COL_FIX: - vs->record_total += page->pg_fix_entries; + vs->record_total += page->entries; break; case WT_PAGE_COL_VAR: recno = 0; @@ -614,7 +614,7 @@ __verify_row_leaf_key_order( * If a tree is empty (just created), it won't have keys; if there * are no keys, we're done. */ - if (page->pg_row_entries == 0) + if (page->entries == 0) return (0); /* @@ -624,7 +624,7 @@ __verify_row_leaf_key_order( */ if (vs->max_addr->size != 0) { WT_RET(__wt_row_leaf_key_copy( - session, page, page->pg_row_d, vs->tmp1)); + session, page, page->pg_row, vs->tmp1)); /* * Compare the key against the largest key we've seen so far. @@ -653,7 +653,7 @@ __verify_row_leaf_key_order( /* Update the largest key we've seen to the last key on this page. */ WT_RET(__wt_row_leaf_key_copy(session, page, - page->pg_row_d + (page->pg_row_entries - 1), vs->max_key)); + page->pg_row + (page->entries - 1), vs->max_key)); (void)__wt_page_addr_string(session, ref, vs->max_addr); return (0); diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c index a7920da5267..9ccb9728189 100644 --- a/src/btree/col_modify.c +++ b/src/btree/col_modify.c @@ -115,9 +115,8 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, page, mod->mod_col_update, ins_headp, 1); ins_headp = &mod->mod_col_update[0]; } else { - WT_PAGE_ALLOC_AND_SWAP(session, - page, mod->mod_col_update, ins_headp, - page->pg_var_entries); + WT_PAGE_ALLOC_AND_SWAP(session, page, + mod->mod_col_update, ins_headp, page->entries); ins_headp = &mod->mod_col_update[cbt->slot]; } diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c index 64ee9e94f4c..c72d66f8796 100644 --- a/src/btree/col_srch.c +++ b/src/btree/col_srch.c @@ -240,8 +240,8 @@ leaf_only: cbt->compare = 1; return (0); } - if (recno >= current->ref_recno + page->pg_fix_entries) { - cbt->recno = current->ref_recno + page->pg_fix_entries; + if (recno >= current->ref_recno + page->entries) { + cbt->recno = current->ref_recno + page->entries; goto past_end; } else { cbt->recno = recno; @@ -257,8 +257,7 @@ leaf_only: } if ((cip = __col_var_search(current, recno, NULL)) == NULL) { cbt->recno = __col_var_last_recno(current); - cbt->slot = page->pg_var_entries == 0 ? - 0 : page->pg_var_entries - 1; + cbt->slot = page->entries == 0 ? 0 : page->entries - 1; goto past_end; } else { cbt->recno = recno; diff --git a/src/btree/row_key.c b/src/btree/row_key.c index 99ee34a6c5d..032fdf7d897 100644 --- a/src/btree/row_key.c +++ b/src/btree/row_key.c @@ -26,7 +26,7 @@ __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) btree = S2BT(session); - if (page->pg_row_entries == 0) { /* Just checking... */ + if (page->entries == 0) { /* Just checking... */ F_SET_ATOMIC(page, WT_PAGE_BUILD_KEYS); return (0); } @@ -51,15 +51,15 @@ __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) */ WT_RET(__wt_scr_alloc(session, 0, &key)); WT_RET(__wt_scr_alloc(session, - (uint32_t)__bitstr_size(page->pg_row_entries), &tmp)); + (uint32_t)__bitstr_size(page->entries), &tmp)); memset(tmp->mem, 0, tmp->memsize); if ((gap = btree->key_gap) == 0) gap = 1; - __inmem_row_leaf_slots(tmp->mem, 0, page->pg_row_entries, gap); + __inmem_row_leaf_slots(tmp->mem, 0, page->entries, gap); /* Instantiate the keys. */ - for (rip = page->pg_row_d, i = 0; i < page->pg_row_entries; ++rip, ++i) + for (rip = page->pg_row, i = 0; i < page->entries; ++rip, ++i) if (__bit_test(tmp->mem, i)) WT_ERR(__wt_row_leaf_key_work( session, page, rip, key, true)); @@ -282,7 +282,7 @@ switch_and_jump: /* Switching to a forward roll. */ * the tracking cache. */ if (slot_offset == 0) { - __wt_readlock(session, btree->ovfl_lock); + __wt_readlock(session, &btree->ovfl_lock); copy = WT_ROW_KEY_COPY(rip); if (!__wt_row_leaf_key_info(page, copy, NULL, &cell, &keyb->data, &keyb->size)) { @@ -290,7 +290,7 @@ switch_and_jump: /* Switching to a forward roll. */ ret = __wt_dsk_cell_data_ref(session, WT_PAGE_ROW_LEAF, unpack, keyb); } - __wt_readunlock(session, btree->ovfl_lock); + __wt_readunlock(session, &btree->ovfl_lock); WT_ERR(ret); break; } diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c index a1c214e5b8b..b1a81ca3d9f 100644 --- a/src/btree/row_modify.c +++ b/src/btree/row_modify.c @@ -85,9 +85,8 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, if (cbt->compare == 0) { if (cbt->ins == NULL) { /* Allocate an update array as necessary. */ - WT_PAGE_ALLOC_AND_SWAP(session, - page, mod->mod_row_update, - upd_entry, page->pg_row_entries); + WT_PAGE_ALLOC_AND_SWAP(session, page, + mod->mod_row_update, upd_entry, page->entries); /* Set the WT_UPDATE array reference. */ upd_entry = &mod->mod_row_update[cbt->slot]; @@ -147,10 +146,10 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, * slot. That's hard, so we set a flag. */ WT_PAGE_ALLOC_AND_SWAP(session, page, - mod->mod_row_insert, ins_headp, page->pg_row_entries + 1); + mod->mod_row_insert, ins_headp, page->entries + 1); ins_slot = F_ISSET(cbt, WT_CBT_SEARCH_SMALLEST) ? - page->pg_row_entries: cbt->slot; + page->entries: cbt->slot; ins_headp = &mod->mod_row_insert[ins_slot]; /* Allocate the WT_INSERT_HEAD structure as necessary. */ diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c index d4e82c458d4..aa299a161da 100644 --- a/src/btree/row_srch.c +++ b/src/btree/row_srch.c @@ -486,14 +486,14 @@ leaf_only: if (insert && descend_right) { cbt->append_tree = 1; - if (page->pg_row_entries == 0) { - cbt->slot = WT_ROW_SLOT(page, page->pg_row_d); + if (page->entries == 0) { + cbt->slot = WT_ROW_SLOT(page, page->pg_row); F_SET(cbt, WT_CBT_SEARCH_SMALLEST); ins_head = WT_ROW_INSERT_SMALLEST(page); } else { cbt->slot = WT_ROW_SLOT(page, - page->pg_row_d + (page->pg_row_entries - 1)); + page->pg_row + (page->entries - 1)); ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot); } @@ -511,11 +511,11 @@ leaf_only: * doing the tests and error handling inside the loop costs about 5%. */ base = 0; - limit = page->pg_row_entries; + limit = page->entries; if (collator == NULL && srch_key->size <= WT_COMPARE_SHORT_MAXLEN) for (; limit != 0; limit >>= 1) { indx = base + (limit >> 1); - rip = page->pg_row_d + indx; + rip = page->pg_row + indx; WT_ERR( __wt_row_leaf_key(session, page, rip, item, true)); @@ -529,7 +529,7 @@ leaf_only: else if (collator == NULL) for (; limit != 0; limit >>= 1) { indx = base + (limit >> 1); - rip = page->pg_row_d + indx; + rip = page->pg_row + indx; WT_ERR( __wt_row_leaf_key(session, page, rip, item, true)); @@ -547,7 +547,7 @@ leaf_only: else for (; limit != 0; limit >>= 1) { indx = base + (limit >> 1); - rip = page->pg_row_d + indx; + rip = page->pg_row + indx; WT_ERR( __wt_row_leaf_key(session, page, rip, item, true)); @@ -591,13 +591,13 @@ leaf_match: cbt->compare = 0; */ if (base == 0) { cbt->compare = 1; - cbt->slot = WT_ROW_SLOT(page, page->pg_row_d); + cbt->slot = WT_ROW_SLOT(page, page->pg_row); F_SET(cbt, WT_CBT_SEARCH_SMALLEST); ins_head = WT_ROW_INSERT_SMALLEST(page); } else { cbt->compare = -1; - cbt->slot = WT_ROW_SLOT(page, page->pg_row_d + (base - 1)); + cbt->slot = WT_ROW_SLOT(page, page->pg_row + (base - 1)); ins_head = WT_ROW_INSERT_SLOT(page, cbt->slot); } @@ -645,16 +645,16 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) __cursor_pos_clear(cbt); /* If the page has disk-based entries, select from them. */ - if (page->pg_row_entries != 0) { + if (page->entries != 0) { cbt->compare = 0; - cbt->slot = __wt_random(&session->rnd) % page->pg_row_entries; + cbt->slot = __wt_random(&session->rnd) % page->entries; /* * The real row-store search function builds the key, so we * have to as well. */ return (__wt_row_leaf_key(session, - page, page->pg_row_d + cbt->slot, cbt->tmp, false)); + page, page->pg_row + cbt->slot, cbt->tmp, false)); } /* diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index e9e3925c57e..b2f4bb04ce4 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -42,7 +42,7 @@ __conn_dhandle_alloc(WT_SESSION_IMPL *session, WT_RET(__wt_calloc_one(session, &dhandle)); - WT_ERR(__wt_rwlock_alloc(session, &dhandle->rwlock, "data handle")); + __wt_rwlock_init(session, &dhandle->rwlock); dhandle->name_hash = __wt_hash_city64(uri, strlen(uri)); WT_ERR(__wt_strdup(session, uri, &dhandle->name)); WT_ERR(__wt_strdup(session, checkpoint, &dhandle->checkpoint)); diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index 02182daa7dc..3f7fc9bb2a7 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -64,8 +64,7 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file")); /* Read-write locks */ - WT_RET(__wt_rwlock_alloc( - session, &conn->hot_backup_lock, "hot backup")); + __wt_rwlock_init(session, &conn->hot_backup_lock); WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS, &conn->page_lock)); for (i = 0; i < WT_PAGE_LOCKS; ++i) diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index 8198b3a1a02..8f8f8614ba8 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -237,7 +237,7 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file) * We can only archive files if a hot backup is not in progress or * if we are the backup. */ - __wt_readlock(session, conn->hot_backup_lock); + __wt_readlock(session, &conn->hot_backup_lock); locked = true; if (!conn->hot_backup || backup_file != 0) { for (i = 0; i < logcount; i++) { @@ -248,7 +248,7 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file) session, WT_LOG_FILENAME, lognum)); } } - __wt_readunlock(session, conn->hot_backup_lock); + __wt_readunlock(session, &conn->hot_backup_lock); locked = false; /* @@ -260,7 +260,7 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file) if (0) err: __wt_err(session, ret, "log archive server error"); if (locked) - __wt_readunlock(session, conn->hot_backup_lock); + __wt_readunlock(session, &conn->hot_backup_lock); WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount)); return (ret); } @@ -355,9 +355,9 @@ __wt_log_truncate_files( __wt_verbose(session, WT_VERB_LOG, "log_truncate_files: Archive once up to %" PRIu32, backup_file); - __wt_writelock(session, log->log_archive_lock); + __wt_writelock(session, &log->log_archive_lock); ret = __log_archive_once(session, backup_file); - __wt_writeunlock(session, log->log_archive_lock); + __wt_writeunlock(session, &log->log_archive_lock); return (ret); } @@ -433,7 +433,7 @@ __log_file_server(void *arg) */ if (!conn->hot_backup) { __wt_readlock( - session, conn->hot_backup_lock); + session, &conn->hot_backup_lock); if (!conn->hot_backup) WT_ERR_ERROR_OK( __wt_ftruncate(session, @@ -441,7 +441,7 @@ __log_file_server(void *arg) close_end_lsn.l.offset), ENOTSUP); __wt_readunlock( - session, conn->hot_backup_lock); + session, &conn->hot_backup_lock); } WT_SET_LSN(&close_end_lsn, close_end_lsn.l.file + 1, 0); @@ -814,10 +814,11 @@ __log_server(void *arg) * agreed not to rename or remove any files in * the database directory. */ - __wt_readlock(session, conn->hot_backup_lock); + __wt_readlock(session, &conn->hot_backup_lock); if (!conn->hot_backup) ret = __log_prealloc_once(session); - __wt_readunlock(session, conn->hot_backup_lock); + __wt_readunlock( + session, &conn->hot_backup_lock); WT_ERR(ret); } @@ -826,10 +827,10 @@ __log_server(void *arg) */ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) { if (__wt_try_writelock( - session, log->log_archive_lock) == 0) { + session, &log->log_archive_lock) == 0) { ret = __log_archive_once(session, 0); __wt_writeunlock( - session, log->log_archive_lock); + session, &log->log_archive_lock); WT_ERR(ret); } else __wt_verbose(session, WT_VERB_LOG, @@ -884,8 +885,7 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET(__wt_spin_init(session, &log->log_sync_lock, "log sync")); WT_RET(__wt_spin_init(session, &log->log_writelsn_lock, "log write LSN")); - WT_RET(__wt_rwlock_alloc(session, - &log->log_archive_lock, "log archive lock")); + __wt_rwlock_init(session, &log->log_archive_lock); if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_LOG)) log->allocsize = (uint32_t) WT_MAX(conn->buffer_alignment, WT_LOG_ALIGN); diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index d1254d8afcc..7d5cb7d7c72 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -81,7 +81,7 @@ __sweep_expire_one(WT_SESSION_IMPL *session) * handle list lock so that connection-level handle searches * never need to retry. */ - WT_RET(__wt_try_writelock(session, dhandle->rwlock)); + WT_RET(__wt_try_writelock(session, &dhandle->rwlock)); /* Only sweep clean trees where all updates are visible. */ if (btree->modified || @@ -95,7 +95,7 @@ __sweep_expire_one(WT_SESSION_IMPL *session) */ ret = __wt_conn_btree_sync_and_close(session, false, true); -err: __wt_writeunlock(session, dhandle->rwlock); +err: __wt_writeunlock(session, &dhandle->rwlock); return (ret); } @@ -188,7 +188,7 @@ __sweep_remove_one(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle) WT_DECL_RET; /* Try to get exclusive access. */ - WT_RET(__wt_try_writelock(session, dhandle->rwlock)); + WT_RET(__wt_try_writelock(session, &dhandle->rwlock)); /* * If there are no longer any references to the handle in any @@ -205,7 +205,7 @@ __sweep_remove_one(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle) * don't retry the discard until it times out again. */ if (ret != 0) { -err: __wt_writeunlock(session, dhandle->rwlock); +err: __wt_writeunlock(session, &dhandle->rwlock); } return (ret); diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c index 456aa2e0f02..08b15e6ca5e 100644 --- a/src/cursor/cur_backup.c +++ b/src/cursor/cur_backup.c @@ -230,10 +230,10 @@ __backup_start( * We are holding the checkpoint and schema locks so schema operations * will not see the backup file list until it is complete and valid. */ - __wt_writelock(session, conn->hot_backup_lock); + __wt_writelock(session, &conn->hot_backup_lock); conn->hot_backup = true; conn->hot_backup_list = NULL; - __wt_writeunlock(session, conn->hot_backup_lock); + __wt_writeunlock(session, &conn->hot_backup_lock); /* We're the lock holder, we own cleanup. */ F_SET(cb, WT_CURBACKUP_LOCKER); @@ -297,9 +297,9 @@ err: /* Close the hot backup file. */ if (ret == 0) { WT_ASSERT(session, dest != NULL); WT_TRET(__wt_fs_rename(session, WT_BACKUP_TMP, dest, false)); - __wt_writelock(session, conn->hot_backup_lock); + __wt_writelock(session, &conn->hot_backup_lock); conn->hot_backup_list = cb->list; - __wt_writeunlock(session, conn->hot_backup_lock); + __wt_writeunlock(session, &conn->hot_backup_lock); } return (ret); @@ -319,9 +319,9 @@ __backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) conn = S2C(session); /* Release all btree names held by the backup. */ - __wt_writelock(session, conn->hot_backup_lock); + __wt_writelock(session, &conn->hot_backup_lock); conn->hot_backup_list = NULL; - __wt_writeunlock(session, conn->hot_backup_lock); + __wt_writeunlock(session, &conn->hot_backup_lock); if (cb->list != NULL) { for (i = 0; cb->list[i] != NULL; ++i) __wt_free(session, cb->list[i]); @@ -332,9 +332,9 @@ __backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) WT_TRET(__wt_backup_file_remove(session)); /* Checkpoint deletion can proceed, as can the next hot backup. */ - __wt_writelock(session, conn->hot_backup_lock); + __wt_writelock(session, &conn->hot_backup_lock); conn->hot_backup = false; - __wt_writeunlock(session, conn->hot_backup_lock); + __wt_writeunlock(session, &conn->hot_backup_lock); return (ret); } diff --git a/src/cursor/cur_log.c b/src/cursor/cur_log.c index 3ee6554b3c0..e5b56aa406f 100644 --- a/src/cursor/cur_log.c +++ b/src/cursor/cur_log.c @@ -305,7 +305,7 @@ __curlog_close(WT_CURSOR *cursor) WT_ASSERT(session, FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)); if (F_ISSET(cl, WT_CURLOG_ARCHIVE_LOCK)) - __wt_readunlock(session, conn->log->log_archive_lock); + __wt_readunlock(session, &conn->log->log_archive_lock); __wt_free(session, cl->cur_lsn); __wt_free(session, cl->next_lsn); @@ -383,7 +383,7 @@ __wt_curlog_open(WT_SESSION_IMPL *session, WT_ERR(__wt_log_force_write(session, 1, NULL)); /* Log cursors block archiving. */ - __wt_readlock(session, log->log_archive_lock); + __wt_readlock(session, &log->log_archive_lock); F_SET(cl, WT_CURLOG_ARCHIVE_LOCK); if (0) { diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 0a2a9d28402..214b5c007cb 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -420,7 +420,7 @@ __wt_evict_destroy(WT_SESSION_IMPL *session) return (0); /* Wait for any eviction thread group changes to stabilize. */ - __wt_writelock(session, conn->evict_threads.lock); + __wt_writelock(session, &conn->evict_threads.lock); /* * Signal the threads to finish and stop populating the queue. diff --git a/src/include/btmem.h b/src/include/btmem.h index 9bd835f5d09..43c1a309d52 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -434,6 +434,19 @@ struct __wt_page_modify { uint8_t update_restored; /* Page created by restoring updates */ }; +/* + * WT_COL_RLE -- + * Variable-length column-store pages have an array of page entries with RLE + * counts greater than 1 when reading the page, so it's not necessary to walk + * the page counting records to find a specific entry. We can do a binary search + * in this array, then an offset calculation to find the cell. + */ +WT_PACKED_STRUCT_BEGIN(__wt_col_rle) + uint64_t recno; /* Record number of first repeat. */ + uint64_t rle; /* Repeat count. */ + uint32_t indx; /* Slot of entry in col_var. */ +WT_PACKED_STRUCT_END + /* * WT_PAGE -- * The WT_PAGE structure describes the in-memory page information. @@ -515,53 +528,54 @@ struct __wt_page { } while (0) /* Row-store leaf page. */ - struct { - WT_ROW *d; /* Key/value pairs */ - uint32_t entries; /* Entries */ - } row; -#undef pg_row_d -#define pg_row_d u.row.d -#undef pg_row_entries -#define pg_row_entries u.row.entries + WT_ROW *row; /* Key/value pairs */ +#undef pg_row +#define pg_row u.row /* Fixed-length column-store leaf page. */ - struct { - uint8_t *bitf; /* Values */ - uint32_t entries; /* Entries */ - } col_fix; + uint8_t *fix_bitf; /* Values */ #undef pg_fix_bitf -#define pg_fix_bitf u.col_fix.bitf -#undef pg_fix_entries -#define pg_fix_entries u.col_fix.entries +#define pg_fix_bitf u.fix_bitf /* Variable-length column-store leaf page. */ struct { - WT_COL *d; /* Values */ + WT_COL *col_var; /* Values */ /* - * Variable-length column-store files maintain a list of - * RLE entries on the page so it's unnecessary to walk - * the page counting records to find a specific entry. + * Variable-length column-store pages have an array + * of page entries with RLE counts greater than 1 when + * reading the page, so it's not necessary to walk the + * page counting records to find a specific entry. We + * can do a binary search in this array, then an offset + * calculation to find the cell. + * + * It's a separate structure to keep the page structure + * as small as possible. */ - WT_COL_RLE *repeats; /* RLE array for lookups */ - uint32_t nrepeats; /* Number of repeat slots */ - - uint32_t entries; /* Entries */ + struct __wt_col_var_repeat { + uint32_t nrepeats; /* repeat slots */ + WT_COL_RLE repeats[0]; /* lookup RLE array */ + } *repeats; +#define WT_COL_VAR_REPEAT_SET(page) \ + ((page)->u.col_var.repeats != NULL) } col_var; -#undef pg_var_d -#define pg_var_d u.col_var.d +#undef pg_var +#define pg_var u.col_var.col_var #undef pg_var_repeats -#define pg_var_repeats u.col_var.repeats +#define pg_var_repeats u.col_var.repeats->repeats #undef pg_var_nrepeats -#define pg_var_nrepeats u.col_var.nrepeats -#undef pg_var_entries -#define pg_var_entries u.col_var.entries +#define pg_var_nrepeats u.col_var.repeats->nrepeats } u; /* - * The page's type and flags are positioned at the end of the WT_PAGE - * union, it reduces cache misses in the row-store search function. + * Page entries, type and flags are positioned at the end of the WT_PAGE + * union to reduce cache misses in the row-store search function. + * + * The entries field only applies to leaf pages, internal pages use the + * page-index entries instead. */ + uint32_t entries; /* Leaf page entries */ + #define WT_PAGE_IS_INTERNAL(page) \ ((page)->type == WT_PAGE_COL_INT || (page)->type == WT_PAGE_ROW_INT) #define WT_PAGE_INVALID 0 /* Invalid page */ @@ -618,8 +632,8 @@ struct __wt_page { #define WT_READGEN_START_VALUE 100 #define WT_READGEN_STEP 100 uint64_t read_gen; - /* The evict pass generation for the page */ - uint64_t evict_pass_gen; + + uint64_t evict_pass_gen; /* Eviction pass generation */ size_t memory_footprint; /* Memory attached to the page */ @@ -792,11 +806,11 @@ struct __wt_row { /* On-page key, on-page cell, or off-page WT_IKEY */ * Walk the entries of an in-memory row-store leaf page. */ #define WT_ROW_FOREACH(page, rip, i) \ - for ((i) = (page)->pg_row_entries, \ - (rip) = (page)->pg_row_d; (i) > 0; ++(rip), --(i)) + for ((i) = (page)->entries, \ + (rip) = (page)->pg_row; (i) > 0; ++(rip), --(i)) #define WT_ROW_FOREACH_REVERSE(page, rip, i) \ - for ((i) = (page)->pg_row_entries, \ - (rip) = (page)->pg_row_d + ((page)->pg_row_entries - 1); \ + for ((i) = (page)->entries, \ + (rip) = (page)->pg_row + ((page)->entries - 1); \ (i) > 0; --(rip), --(i)) /* @@ -804,7 +818,7 @@ struct __wt_row { /* On-page key, on-page cell, or off-page WT_IKEY */ * Return the 0-based array offset based on a WT_ROW reference. */ #define WT_ROW_SLOT(page, rip) \ - ((uint32_t)(((WT_ROW *)(rip)) - (page)->pg_row_d)) + ((uint32_t)(((WT_ROW *)(rip)) - (page)->pg_row)) /* * WT_COL -- @@ -828,18 +842,6 @@ struct __wt_col { uint32_t __col_value; }; -/* - * WT_COL_RLE -- - * In variable-length column store leaf pages, we build an array of entries - * with RLE counts greater than 1 when reading the page. We can do a binary - * search in this array, then an offset calculation to find the cell. - */ -WT_PACKED_STRUCT_BEGIN(__wt_col_rle) - uint64_t recno; /* Record number of first repeat. */ - uint64_t rle; /* Repeat count. */ - uint32_t indx; /* Slot of entry in col_var.d */ -WT_PACKED_STRUCT_END - /* * WT_COL_PTR, WT_COL_PTR_SET -- * Return/Set a pointer corresponding to the data offset. (If the item does @@ -856,15 +858,15 @@ WT_PACKED_STRUCT_END * Walk the entries of variable-length column-store leaf page. */ #define WT_COL_FOREACH(page, cip, i) \ - for ((i) = (page)->pg_var_entries, \ - (cip) = (page)->pg_var_d; (i) > 0; ++(cip), --(i)) + for ((i) = (page)->entries, \ + (cip) = (page)->pg_var; (i) > 0; ++(cip), --(i)) /* * WT_COL_SLOT -- * Return the 0-based array offset based on a WT_COL reference. */ #define WT_COL_SLOT(page, cip) \ - ((uint32_t)(((WT_COL *)cip) - (page)->pg_var_d)) + ((uint32_t)(((WT_COL *)cip) - (page)->pg_var)) /* * WT_IKEY -- @@ -1041,7 +1043,7 @@ struct __wt_insert_head { #define WT_ROW_INSERT_SMALLEST(page) \ ((page)->modify == NULL || \ (page)->modify->mod_row_insert == NULL ? \ - NULL : (page)->modify->mod_row_insert[(page)->pg_row_entries]) + NULL : (page)->modify->mod_row_insert[(page)->entries]) /* * The column-store leaf page update lists are arrays of pointers to structures, diff --git a/src/include/btree.h b/src/include/btree.h index c89e3c36c20..d742310bf8f 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -114,7 +114,7 @@ struct __wt_btree { int split_pct; /* Split page percent */ WT_COMPRESSOR *compressor; /* Page compressor */ WT_KEYED_ENCRYPTOR *kencryptor; /* Page encryptor */ - WT_RWLOCK *ovfl_lock; /* Overflow lock */ + WT_RWLOCK ovfl_lock; /* Overflow lock */ uint64_t last_recno; /* Column-store last record number */ diff --git a/src/include/btree.i b/src/include/btree.i index fba6ee8e38a..09fa8df8c56 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1008,7 +1008,7 @@ __wt_cursor_row_leaf_key(WT_CURSOR_BTREE *cbt, WT_ITEM *key) if (cbt->ins == NULL) { session = (WT_SESSION_IMPL *)cbt->iface.session; page = cbt->ref->page; - rip = &page->u.row.d[cbt->slot]; + rip = &page->pg_row[cbt->slot]; WT_RET(__wt_row_leaf_key(session, page, rip, key, false)); } else { key->data = WT_INSERT_KEY(cbt->ins); @@ -1207,9 +1207,9 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) */ ins_head = page->type == WT_PAGE_ROW_LEAF ? - (page->pg_row_entries == 0 ? + (page->entries == 0 ? WT_ROW_INSERT_SMALLEST(page) : - WT_ROW_INSERT_SLOT(page, page->pg_row_entries - 1)) : + WT_ROW_INSERT_SLOT(page, page->entries - 1)) : WT_COL_APPEND(page); if (ins_head == NULL) return (false); diff --git a/src/include/column.i b/src/include/column.i index d15f874b281..c1b45a1f4e0 100644 --- a/src/include/column.i +++ b/src/include/column.i @@ -221,13 +221,13 @@ __col_var_last_recno(WT_REF *ref) * This function ignores those records, our callers must handle that * explicitly, if they care. */ - if (page->pg_var_nrepeats == 0) - return (page->pg_var_entries == 0 ? 0 : - ref->ref_recno + (page->pg_var_entries - 1)); + if (!WT_COL_VAR_REPEAT_SET(page)) + return (page->entries == 0 ? 0 : + ref->ref_recno + (page->entries - 1)); repeat = &page->pg_var_repeats[page->pg_var_nrepeats - 1]; return ((repeat->recno + repeat->rle) - 1 + - (page->pg_var_entries - (repeat->indx + 1))); + (page->entries - (repeat->indx + 1))); } /* @@ -246,8 +246,7 @@ __col_fix_last_recno(WT_REF *ref) * This function ignores those records, our callers must handle that * explicitly, if they care. */ - return (page->pg_fix_entries == 0 ? - 0 : ref->ref_recno + (page->pg_fix_entries - 1)); + return (page->entries == 0 ? 0 : ref->ref_recno + (page->entries - 1)); } /* @@ -273,7 +272,9 @@ __col_var_search(WT_REF *ref, uint64_t recno, uint64_t *start_recnop) * slot for this record number, because we know any intervening records * have repeat counts of 1. */ - for (base = 0, limit = page->pg_var_nrepeats; limit != 0; limit >>= 1) { + for (base = 0, + limit = WT_COL_VAR_REPEAT_SET(page) ? page->pg_var_nrepeats : 0; + limit != 0; limit >>= 1) { indx = base + (limit >> 1); repeat = page->pg_var_repeats + indx; @@ -281,7 +282,7 @@ __col_var_search(WT_REF *ref, uint64_t recno, uint64_t *start_recnop) recno < repeat->recno + repeat->rle) { if (start_recnop != NULL) *start_recnop = repeat->recno; - return (page->pg_var_d + repeat->indx); + return (page->pg_var + repeat->indx); } if (recno < repeat->recno) continue; @@ -306,14 +307,14 @@ __col_var_search(WT_REF *ref, uint64_t recno, uint64_t *start_recnop) * !!! * The test could be written more simply as: * - * (recno >= start_recno + (page->pg_var_entries - start_indx)) + * (recno >= start_recno + (page->entries - start_indx)) * * It's split into two parts because the simpler test will overflow if * searching for large record numbers. */ if (recno >= start_recno && - recno - start_recno >= page->pg_var_entries - start_indx) + recno - start_recno >= page->entries - start_indx) return (NULL); - return (page->pg_var_d + start_indx + (uint32_t)(recno - start_recno)); + return (page->pg_var + start_indx + (uint32_t)(recno - start_recno)); } diff --git a/src/include/connection.h b/src/include/connection.h index 60ce5f55234..6818633d816 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -262,7 +262,7 @@ struct __wt_connection_impl { WT_TXN_GLOBAL txn_global; /* Global transaction state */ - WT_RWLOCK *hot_backup_lock; /* Hot backup serialization */ + WT_RWLOCK hot_backup_lock; /* Hot backup serialization */ bool hot_backup; /* Hot backup in progress */ char **hot_backup_list; /* Hot backup file list */ diff --git a/src/include/cursor.h b/src/include/cursor.h index d522abc2a56..31c8963a486 100644 --- a/src/include/cursor.h +++ b/src/include/cursor.h @@ -52,8 +52,8 @@ { 0 }, /* recno raw buffer */ \ NULL, /* json_private */ \ NULL, /* lang_private */ \ - { NULL, 0, 0, NULL, 0 }, /* WT_ITEM key */ \ - { NULL, 0, 0, NULL, 0 }, /* WT_ITEM value */ \ + { NULL, 0, NULL, 0, 0 }, /* WT_ITEM key */ \ + { NULL, 0, NULL, 0, 0 }, /* WT_ITEM value */ \ 0, /* int saved_err */ \ NULL, /* internal_uri */ \ 0 /* uint32_t flags */ \ diff --git a/src/include/dhandle.h b/src/include/dhandle.h index d7802bb319b..dcc788f0839 100644 --- a/src/include/dhandle.h +++ b/src/include/dhandle.h @@ -42,7 +42,7 @@ * A handle for a generic named data source. */ struct __wt_data_handle { - WT_RWLOCK *rwlock; /* Lock for shared/exclusive ops */ + WT_RWLOCK rwlock; /* Lock for shared/exclusive ops */ TAILQ_ENTRY(__wt_data_handle) q; TAILQ_ENTRY(__wt_data_handle) hashq; diff --git a/src/include/extern.h b/src/include/extern.h index bb7fbddcae5..bcad3580e25 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -671,16 +671,16 @@ extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg) WT_G extern void __wt_print_huffman_code(void *huffman_arg, uint16_t symbol) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_rwlock_alloc( WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern bool __wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern bool __wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern uint32_t __wt_nlpo2_round(uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern uint32_t __wt_nlpo2(uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern uint32_t __wt_log2_int(uint32_t n) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/log.h b/src/include/log.h index 3f2cb2ba8e6..d9fea892c68 100644 --- a/src/include/log.h +++ b/src/include/log.h @@ -235,7 +235,7 @@ struct __wt_log { WT_SPINLOCK log_sync_lock; /* Locked: Single-thread fsync */ WT_SPINLOCK log_writelsn_lock; /* Locked: write LSN */ - WT_RWLOCK *log_archive_lock; /* Archive and log cursors */ + WT_RWLOCK log_archive_lock;/* Archive and log cursors */ /* Notify any waiting threads when sync_lsn is updated. */ WT_CONDVAR *log_sync_cond; diff --git a/src/include/lsm.h b/src/include/lsm.h index fefed9daa81..2bbb813bad2 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -189,7 +189,7 @@ struct __wt_lsm_tree { #define LSM_TREE_MAX_QUEUE 100 uint32_t queue_ref; - WT_RWLOCK *rwlock; + WT_RWLOCK rwlock; TAILQ_ENTRY(__wt_lsm_tree) q; uint64_t dsk_gen; diff --git a/src/include/mutex.h b/src/include/mutex.h index 6b81b1a6265..727a690bb1c 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -30,11 +30,14 @@ struct __wt_condvar { }; /* + * Read/write locks: + * + * WiredTiger uses read/write locks for shared/exclusive access to resources. * !!! * Don't modify this structure without understanding the read/write locking * functions. */ -typedef union { /* Read/write lock */ +union __wt_rwlock { /* Read/write lock */ uint64_t u; struct { uint32_t wr; /* Writers and readers */ @@ -45,19 +48,6 @@ typedef union { /* Read/write lock */ uint16_t next; /* Next available ticket number */ uint16_t writers_active;/* Count of active writers */ } s; -} wt_rwlock_t; - -/* - * Read/write locks: - * - * WiredTiger uses read/write locks for shared/exclusive access to resources. - */ -struct __wt_rwlock { - WT_CACHE_LINE_PAD_BEGIN - const char *name; /* Lock name for debugging */ - - wt_rwlock_t rwlock; /* Read/write lock */ - WT_CACHE_LINE_PAD_END }; /* diff --git a/src/include/thread_group.h b/src/include/thread_group.h index 76758a090c4..77cff00dc8d 100644 --- a/src/include/thread_group.h +++ b/src/include/thread_group.h @@ -40,7 +40,7 @@ struct __wt_thread_group { const char *name; /* Name */ - WT_RWLOCK *lock; /* Protects group changes */ + WT_RWLOCK lock; /* Protects group changes */ /* * Condition signalled when wanting to wake up threads that are diff --git a/src/include/txn.h b/src/include/txn.h index 12fc2a0a5b7..7e802c188ab 100644 --- a/src/include/txn.h +++ b/src/include/txn.h @@ -92,7 +92,7 @@ struct __wt_txn_global { * Prevents the oldest ID moving forwards while threads are scanning * the global transaction state. */ - WT_RWLOCK *scan_rwlock; + WT_RWLOCK scan_rwlock; /* * Track information about the running checkpoint. The transaction @@ -114,7 +114,7 @@ struct __wt_txn_global { volatile uint64_t metadata_pinned; /* Oldest ID for metadata */ /* Named snapshot state. */ - WT_RWLOCK *nsnap_rwlock; + WT_RWLOCK nsnap_rwlock; volatile uint64_t nsnap_oldest_id; TAILQ_HEAD(__wt_nsnap_qh, __wt_named_snapshot) nsnaph; diff --git a/src/include/verify_build.h b/src/include/verify_build.h index 8abc192892e..640f5e4cf5f 100644 --- a/src/include/verify_build.h +++ b/src/include/verify_build.h @@ -59,7 +59,6 @@ __wt_verify_build(void) sizeof(s) > WT_CACHE_LINE_ALIGNMENT || \ sizeof(s) % WT_CACHE_LINE_ALIGNMENT == 0) WT_PADDING_CHECK(WT_LOGSLOT); - WT_PADDING_CHECK(WT_RWLOCK); WT_PADDING_CHECK(WT_SPINLOCK); WT_PADDING_CHECK(WT_TXN_STATE); diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index f9e232e0310..9ee28317bc4 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -114,16 +114,16 @@ struct __wt_item { size_t size; #ifndef DOXYGEN -#define WT_ITEM_ALIGNED 0x00000001 -#define WT_ITEM_INUSE 0x00000002 - /* This appears in the middle of the struct to avoid padding. */ - /*! Object flags (internal use). */ - uint32_t flags; - /*! Managed memory chunk (internal use). */ void *mem; + /*! Managed memory size (internal use). */ size_t memsize; + +#define WT_ITEM_ALIGNED 0x00000001 +#define WT_ITEM_INUSE 0x00000002 + /*! Object flags (internal use). */ + uint32_t flags; #endif }; diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h index e18563dd2d2..da318ad8a86 100644 --- a/src/include/wt_internal.h +++ b/src/include/wt_internal.h @@ -106,6 +106,8 @@ struct __wt_col; typedef struct __wt_col WT_COL; struct __wt_col_rle; typedef struct __wt_col_rle WT_COL_RLE; +struct __wt_col_var_repeat; + typedef struct __wt_col_var_repeat WT_COL_VAR_REPEAT; struct __wt_colgroup; typedef struct __wt_colgroup WT_COLGROUP; struct __wt_compact_state; @@ -266,8 +268,6 @@ struct __wt_ref; typedef struct __wt_ref WT_REF; struct __wt_row; typedef struct __wt_row WT_ROW; -struct __wt_rwlock; - typedef struct __wt_rwlock WT_RWLOCK; struct __wt_salvage_cookie; typedef struct __wt_salvage_cookie WT_SALVAGE_COOKIE; struct __wt_save_upd; @@ -302,6 +302,8 @@ union __wt_lsn; typedef union __wt_lsn WT_LSN; union __wt_rand_state; typedef union __wt_rand_state WT_RAND_STATE; +union __wt_rwlock; + typedef union __wt_rwlock WT_RWLOCK; /* * Forward type declarations for internal types: END * DO NOT EDIT: automatically built by dist/s_typedef. diff --git a/src/log/log.c b/src/log/log.c index 413df312a15..fb3935abf81 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -895,12 +895,12 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) */ create_log = true; if (conn->log_prealloc > 0 && !conn->hot_backup) { - __wt_readlock(session, conn->hot_backup_lock); + __wt_readlock(session, &conn->hot_backup_lock); if (conn->hot_backup) - __wt_readunlock(session, conn->hot_backup_lock); + __wt_readunlock(session, &conn->hot_backup_lock); else { ret = __log_alloc_prealloc(session, log->fileid); - __wt_readunlock(session, conn->hot_backup_lock); + __wt_readunlock(session, &conn->hot_backup_lock); /* * If ret is 0 it means we found a pre-allocated file. @@ -1029,12 +1029,12 @@ __log_truncate_file(WT_SESSION_IMPL *session, WT_FH *log_fh, wt_off_t offset) log = conn->log; if (!F_ISSET(log, WT_LOG_TRUNCATE_NOTSUP) && !conn->hot_backup) { - __wt_readlock(session, conn->hot_backup_lock); + __wt_readlock(session, &conn->hot_backup_lock); if (conn->hot_backup) - __wt_readunlock(session, conn->hot_backup_lock); + __wt_readunlock(session, &conn->hot_backup_lock); else { ret = __wt_ftruncate(session, log_fh, offset); - __wt_readunlock(session, conn->hot_backup_lock); + __wt_readunlock(session, &conn->hot_backup_lock); if (ret != ENOTSUP) return (ret); F_SET(log, WT_LOG_TRUNCATE_NOTSUP); diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 839648b97d7..a2511f48e2b 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -304,7 +304,7 @@ __clsm_leave(WT_CURSOR_LSM *clsm) * byte, if the application uses two leading DC4 byte for some reason, we'll do * a wasted data copy each time a new value is inserted into the object. */ -static const WT_ITEM __tombstone = { "\x14\x14", 2, 0, NULL, 0 }; +static const WT_ITEM __tombstone = { "\x14\x14", 2, NULL, 0, 0 }; /* * __clsm_deleted -- diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 38d87dd852b..71a981a6284 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -469,7 +469,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session, /* Try to open the tree. */ WT_RET(__wt_calloc_one(session, &lsm_tree)); - WT_ERR(__wt_rwlock_alloc(session, &lsm_tree->rwlock, "lsm tree")); + __wt_rwlock_init(session, &lsm_tree->rwlock); WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri)); @@ -1082,7 +1082,7 @@ err: if (locked) void __wt_lsm_tree_readlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { - __wt_readlock(session, lsm_tree->rwlock); + __wt_readlock(session, &lsm_tree->rwlock); /* * Diagnostic: avoid deadlocks with the schema lock: if we need it for @@ -1100,7 +1100,7 @@ __wt_lsm_tree_readunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { F_CLR(session, WT_SESSION_NO_EVICTION | WT_SESSION_NO_SCHEMA_LOCK); - __wt_readunlock(session, lsm_tree->rwlock); + __wt_readunlock(session, &lsm_tree->rwlock); } /* @@ -1110,7 +1110,7 @@ __wt_lsm_tree_readunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) void __wt_lsm_tree_writelock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { - __wt_writelock(session, lsm_tree->rwlock); + __wt_writelock(session, &lsm_tree->rwlock); /* * Diagnostic: avoid deadlocks with the schema lock: if we need it for @@ -1128,7 +1128,7 @@ __wt_lsm_tree_writeunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { F_CLR(session, WT_SESSION_NO_EVICTION | WT_SESSION_NO_SCHEMA_LOCK); - __wt_writeunlock(session, lsm_tree->rwlock); + __wt_writeunlock(session, &lsm_tree->rwlock); } /* diff --git a/src/reconcile/rec_track.c b/src/reconcile/rec_track.c index 3795b6e5ae8..5bf425b1b21 100644 --- a/src/reconcile/rec_track.c +++ b/src/reconcile/rec_track.c @@ -875,9 +875,9 @@ __wt_ovfl_track_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page) WT_RET(__ovfl_reuse_wrapup(session, page)); if (track->ovfl_txnc[0] != NULL) { - __wt_writelock(session, S2BT(session)->ovfl_lock); + __wt_writelock(session, &S2BT(session)->ovfl_lock); ret = __ovfl_txnc_wrapup(session, page); - __wt_writeunlock(session, S2BT(session)->ovfl_lock); + __wt_writeunlock(session, &S2BT(session)->ovfl_lock); } return (ret); } @@ -903,9 +903,9 @@ __wt_ovfl_track_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page) WT_RET(__ovfl_reuse_wrapup_err(session, page)); if (track->ovfl_txnc[0] != NULL) { - __wt_writelock(session, S2BT(session)->ovfl_lock); + __wt_writelock(session, &S2BT(session)->ovfl_lock); ret = __ovfl_txnc_wrapup(session, page); - __wt_writeunlock(session, S2BT(session)->ovfl_lock); + __wt_writeunlock(session, &S2BT(session)->ovfl_lock); } return (ret); } diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index e82f449a50d..a667a288187 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -4069,7 +4069,7 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) /* Copy the original, disk-image bytes into place. */ memcpy(r->first_free, page->pg_fix_bitf, - __bitstr_size((size_t)page->pg_fix_entries * btree->bitcnt)); + __bitstr_size((size_t)page->entries * btree->bitcnt)); /* Update any changes to the original on-page data items. */ WT_SKIP_FOREACH(ins, WT_COL_UPDATE_SINGLE(page)) { @@ -4081,9 +4081,8 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) } /* Calculate the number of entries per page remainder. */ - entry = page->pg_fix_entries; - nrecs = WT_FIX_BYTES_TO_ENTRIES( - btree, r->space_avail) - page->pg_fix_entries; + entry = page->entries; + nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail) - page->entries; r->recno += entry; /* Walk any append list. */ @@ -4206,7 +4205,7 @@ __rec_col_fix_slvg(WT_SESSION_IMPL *session, session, r, page, pageref->ref_recno, btree->maxleafpage)); /* We may not be taking all of the entries on the original page. */ - page_take = salvage->take == 0 ? page->pg_fix_entries : salvage->take; + page_take = salvage->take == 0 ? page->entries : salvage->take; page_start = salvage->skip == 0 ? 0 : salvage->skip; /* Calculate the number of entries per page. */ diff --git a/src/schema/schema_util.c b/src/schema/schema_util.c index 433224a868e..9de4b916a79 100644 --- a/src/schema/schema_util.c +++ b/src/schema/schema_util.c @@ -26,7 +26,7 @@ __wt_schema_backup_check(WT_SESSION_IMPL *session, const char *name) conn = S2C(session); if (!conn->hot_backup) return (0); - __wt_readlock(session, conn->hot_backup_lock); + __wt_readlock(session, &conn->hot_backup_lock); /* * There is a window at the end of a backup where the list has been * cleared from the connection but the flag is still set. It is safe @@ -34,7 +34,7 @@ __wt_schema_backup_check(WT_SESSION_IMPL *session, const char *name) */ if (!conn->hot_backup || (backup_list = conn->hot_backup_list) == NULL) { - __wt_readunlock(session, conn->hot_backup_lock); + __wt_readunlock(session, &conn->hot_backup_lock); return (0); } for (i = 0; backup_list[i] != NULL; ++i) { @@ -43,7 +43,7 @@ __wt_schema_backup_check(WT_SESSION_IMPL *session, const char *name) break; } } - __wt_readunlock(session, conn->hot_backup_lock); + __wt_readunlock(session, &conn->hot_backup_lock); return (ret); } diff --git a/src/session/session_api.c b/src/session/session_api.c index fe1bf821d3b..fcbfa8809b3 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -1686,7 +1686,7 @@ __session_snapshot(WT_SESSION *wt_session, const char *config) WT_ERR(__wt_txn_named_snapshot_config( session, cfg, &has_create, &has_drop)); - __wt_writelock(session, txn_global->nsnap_rwlock); + __wt_writelock(session, &txn_global->nsnap_rwlock); /* Drop any snapshots to be removed first. */ if (has_drop) @@ -1696,7 +1696,7 @@ __session_snapshot(WT_SESSION *wt_session, const char *config) if (has_create) WT_ERR(__wt_txn_named_snapshot_begin(session, cfg)); -err: __wt_writeunlock(session, txn_global->nsnap_rwlock); +err: __wt_writeunlock(session, &txn_global->nsnap_rwlock); API_END_RET_NOTFOUND_MAP(session, ret); } diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c index 732dc797b6d..f1251794b89 100644 --- a/src/session/session_dhandle.c +++ b/src/session/session_dhandle.c @@ -181,17 +181,17 @@ __wt_session_lock_dhandle( */ if (F_ISSET(dhandle, WT_DHANDLE_OPEN) && (!want_exclusive || lock_busy)) { - __wt_readlock(session, dhandle->rwlock); + __wt_readlock(session, &dhandle->rwlock); if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) { *is_deadp = 1; - __wt_readunlock(session, dhandle->rwlock); + __wt_readunlock(session, &dhandle->rwlock); return (0); } is_open = F_ISSET(dhandle, WT_DHANDLE_OPEN); if (is_open && !want_exclusive) return (0); - __wt_readunlock(session, dhandle->rwlock); + __wt_readunlock(session, &dhandle->rwlock); } else is_open = false; @@ -201,10 +201,11 @@ __wt_session_lock_dhandle( * with another thread that successfully opens the file, we * don't want to block waiting to get exclusive access. */ - if ((ret = __wt_try_writelock(session, dhandle->rwlock)) == 0) { + if ((ret = + __wt_try_writelock(session, &dhandle->rwlock)) == 0) { if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) { *is_deadp = 1; - __wt_writeunlock(session, dhandle->rwlock); + __wt_writeunlock(session, &dhandle->rwlock); return (0); } @@ -215,7 +216,7 @@ __wt_session_lock_dhandle( if (F_ISSET(dhandle, WT_DHANDLE_OPEN) && !want_exclusive) { lock_busy = false; - __wt_writeunlock(session, dhandle->rwlock); + __wt_writeunlock(session, &dhandle->rwlock); continue; } @@ -286,9 +287,9 @@ __wt_session_release_btree(WT_SESSION_IMPL *session) if (locked) { if (write_locked) { F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE); - __wt_writeunlock(session, dhandle->rwlock); + __wt_writeunlock(session, &dhandle->rwlock); } else - __wt_readunlock(session, dhandle->rwlock); + __wt_readunlock(session, &dhandle->rwlock); } session->dhandle = NULL; @@ -509,7 +510,7 @@ __wt_session_get_btree(WT_SESSION_IMPL *session, dhandle->excl_session = NULL; dhandle->excl_ref = 0; F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE); - __wt_writeunlock(session, dhandle->rwlock); + __wt_writeunlock(session, &dhandle->rwlock); WT_WITH_SCHEMA_LOCK(session, WT_WITH_HANDLE_LIST_LOCK(session, @@ -531,7 +532,7 @@ __wt_session_get_btree(WT_SESSION_IMPL *session, dhandle->excl_session = NULL; dhandle->excl_ref = 0; F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE); - __wt_writeunlock(session, dhandle->rwlock); + __wt_writeunlock(session, &dhandle->rwlock); WT_RET(ret); } diff --git a/src/support/mtx_rw.c b/src/support/mtx_rw.c index ea18f556257..35ad5da23f2 100644 --- a/src/support/mtx_rw.c +++ b/src/support/mtx_rw.c @@ -115,23 +115,27 @@ #include "wt_internal.h" /* - * __wt_rwlock_alloc -- - * Allocate and initialize a read/write lock. + * __wt_rwlock_init -- + * Initialize a read/write lock. */ -int -__wt_rwlock_alloc( - WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp, const char *name) +void +__wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l) { - WT_RWLOCK *rwlock; - - __wt_verbose(session, WT_VERB_MUTEX, "rwlock: alloc %s", name); + WT_UNUSED(session); - WT_RET(__wt_calloc_one(session, &rwlock)); + l->u = 0; +} - rwlock->name = name; +/* + * __wt_rwlock_destroy -- + * Destroy a read/write lock. + */ +void +__wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK *l) +{ + WT_UNUSED(session); - *rwlockp = rwlock; - return (0); + l->u = 0; } /* @@ -139,13 +143,12 @@ __wt_rwlock_alloc( * Try to get a shared lock, fail immediately if unavailable. */ int -__wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) +__wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) { - wt_rwlock_t *l, new, old; + WT_RWLOCK new, old; WT_STAT_CONN_INCR(session, rwlock_read); - l = &rwlock->rwlock; new = old = *l; /* @@ -172,19 +175,15 @@ __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) * exclusive. */ void -__wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) +__wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *l) { - wt_rwlock_t *l; - - l = &rwlock->rwlock; - /* * Try to get the lock in a single operation if it is available to * readers. This avoids the situation where multiple readers arrive * concurrently and have to line up in order to enter the lock. For * read-heavy workloads it can make a significant difference. */ - while (__wt_try_readlock(session, rwlock) != 0) { + while (__wt_try_readlock(session, l) != 0) { if (l->s.writers_active > 0) __wt_yield(); else @@ -197,9 +196,8 @@ __wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) * Get a shared lock. */ void -__wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) +__wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) { - wt_rwlock_t *l; uint16_t ticket; int pause_cnt; @@ -207,8 +205,6 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) WT_DIAGNOSTIC_YIELD; - l = &rwlock->rwlock; - /* * Possibly wrap: if we have more than 64K lockers waiting, the ticket * value will wrap and two lockers will simultaneously be granted the @@ -246,14 +242,10 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) * Release a shared lock. */ void -__wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) +__wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) { - wt_rwlock_t *l; - WT_UNUSED(session); - l = &rwlock->rwlock; - /* * Increment the writers value (other readers are doing the same, make * sure we don't race). @@ -266,13 +258,12 @@ __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) * Try to get an exclusive lock, fail immediately if unavailable. */ int -__wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) +__wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) { - wt_rwlock_t *l, new, old; + WT_RWLOCK new, old; WT_STAT_CONN_INCR(session, rwlock_write); - l = &rwlock->rwlock; old = new = *l; /* @@ -296,16 +287,13 @@ __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) * Wait to get an exclusive lock. */ void -__wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) +__wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) { - wt_rwlock_t *l; uint16_t ticket; int pause_cnt; WT_STAT_CONN_INCR(session, rwlock_write); - l = &rwlock->rwlock; - /* * Possibly wrap: if we have more than 64K lockers waiting, the ticket * value will wrap and two lockers will simultaneously be granted the @@ -338,13 +326,12 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) * Release an exclusive lock. */ void -__wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) +__wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) { - wt_rwlock_t *l, new; + WT_RWLOCK new; WT_UNUSED(session); - l = &rwlock->rwlock; (void)__wt_atomic_sub16(&l->s.writers_active, 1); /* @@ -368,40 +355,16 @@ __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) WT_DIAGNOSTIC_YIELD; } -/* - * __wt_rwlock_destroy -- - * Destroy a read/write lock. - */ -void -__wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp) -{ - WT_RWLOCK *rwlock; - - rwlock = *rwlockp; /* Clear our caller's reference. */ - if (rwlock == NULL) - return; - *rwlockp = NULL; - - __wt_verbose( - session, WT_VERB_MUTEX, "rwlock: destroy %s", rwlock->name); - - __wt_free(session, rwlock); -} - #ifdef HAVE_DIAGNOSTIC /* * __wt_rwlock_islocked -- * Return if a read/write lock is currently locked for reading or writing. */ bool -__wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock) +__wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l) { - wt_rwlock_t *l; - WT_UNUSED(session); - l = &rwlock->rwlock; - return (l->s.writers != l->s.next || l->s.readers != l->s.next); } #endif diff --git a/src/support/thread_group.c b/src/support/thread_group.c index a866d2d01c5..a89468c367a 100644 --- a/src/support/thread_group.c +++ b/src/support/thread_group.c @@ -50,8 +50,7 @@ __thread_group_grow( { WT_THREAD *thread; - WT_ASSERT(session, - __wt_rwlock_islocked(session, group->lock)); + WT_ASSERT(session, __wt_rwlock_islocked(session, &group->lock)); /* * Any bounds checking is done by the caller so we know that @@ -84,8 +83,7 @@ __thread_group_shrink(WT_SESSION_IMPL *session, WT_THREAD *thread; uint32_t current_slot; - WT_ASSERT(session, - __wt_rwlock_islocked(session, group->lock)); + WT_ASSERT(session, __wt_rwlock_islocked(session, &group->lock)); for (current_slot = group->alloc; current_slot > new_count; ) { /* @@ -142,7 +140,7 @@ __thread_group_resize( WT_ASSERT(session, group->current_threads <= group->alloc && - __wt_rwlock_islocked(session, group->lock)); + __wt_rwlock_islocked(session, &group->lock)); if (new_min == group->min && new_max == group->max) return (0); @@ -227,9 +225,9 @@ __wt_thread_group_resize( " from max: %" PRIu32 " -> %" PRIu32, (void *)group, group->min, new_min, group->max, new_max); - __wt_writelock(session, group->lock); + __wt_writelock(session, &group->lock); WT_TRET(__thread_group_resize(session, group, new_min, new_max, flags)); - __wt_writeunlock(session, group->lock); + __wt_writeunlock(session, &group->lock); return (ret); } @@ -255,17 +253,17 @@ __wt_thread_group_create( __wt_verbose(session, WT_VERB_THREAD_GROUP, "Creating thread group: %p", (void *)group); - WT_RET(__wt_rwlock_alloc(session, &group->lock, "Thread group")); + __wt_rwlock_init(session, &group->lock); WT_ERR(__wt_cond_alloc( session, "Thread group cond", false, &group->wait_cond)); cond_alloced = true; - __wt_writelock(session, group->lock); + __wt_writelock(session, &group->lock); group->run_func = run_func; group->name = name; WT_TRET(__thread_group_resize(session, group, min, max, flags)); - __wt_writeunlock(session, group->lock); + __wt_writeunlock(session, &group->lock); /* Cleanup on error to avoid leaking resources */ err: if (ret != 0) { @@ -288,7 +286,7 @@ __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) __wt_verbose(session, WT_VERB_THREAD_GROUP, "Destroying thread group: %p", (void *)group); - WT_ASSERT(session, __wt_rwlock_islocked(session, group->lock)); + WT_ASSERT(session, __wt_rwlock_islocked(session, &group->lock)); /* Shut down all threads and free associated resources. */ WT_TRET(__thread_group_shrink(session, group, 0)); @@ -322,15 +320,15 @@ __wt_thread_group_start_one( return (0); if (wait) - __wt_writelock(session, group->lock); - else if (__wt_try_writelock(session, group->lock) != 0) + __wt_writelock(session, &group->lock); + else if (__wt_try_writelock(session, &group->lock) != 0) return (0); /* Recheck the bounds now that we hold the lock */ if (group->current_threads < group->max) WT_TRET(__thread_group_grow( session, group, group->current_threads + 1)); - __wt_writeunlock(session, group->lock); + __wt_writeunlock(session, &group->lock); return (ret); } diff --git a/src/txn/txn.c b/src/txn/txn.c index 26a0ed679e2..660d37b17d5 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -126,7 +126,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) n = 0; /* We're going to scan the table: wait for the lock. */ - __wt_readlock_spin(session, txn_global->scan_rwlock); + __wt_readlock_spin(session, &txn_global->scan_rwlock); current_id = pinned_id = txn_global->current; prev_oldest_id = txn_global->oldest_id; @@ -180,7 +180,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id); txn_state->pinned_id = pinned_id; -done: __wt_readunlock(session, txn_global->scan_rwlock); +done: __wt_readunlock(session, &txn_global->scan_rwlock); __txn_sort_snapshot(session, n, current_id); } @@ -293,13 +293,13 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) /* First do a read-only scan. */ if (wait) - __wt_readlock_spin(session, txn_global->scan_rwlock); + __wt_readlock_spin(session, &txn_global->scan_rwlock); else if ((ret = - __wt_try_readlock(session, txn_global->scan_rwlock)) != 0) + __wt_try_readlock(session, &txn_global->scan_rwlock)) != 0) return (ret == EBUSY ? 0 : ret); __txn_oldest_scan(session, &oldest_id, &last_running, &metadata_pinned, &oldest_session); - __wt_readunlock(session, txn_global->scan_rwlock); + __wt_readunlock(session, &txn_global->scan_rwlock); /* * If the state hasn't changed (or hasn't moved far enough for @@ -314,9 +314,9 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) /* It looks like an update is necessary, wait for exclusive access. */ if (wait) - __wt_writelock(session, txn_global->scan_rwlock); + __wt_writelock(session, &txn_global->scan_rwlock); else if ((ret = - __wt_try_writelock(session, txn_global->scan_rwlock)) != 0) + __wt_try_writelock(session, &txn_global->scan_rwlock)) != 0) return (ret == EBUSY ? 0 : ret); /* @@ -375,7 +375,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) #endif } -done: __wt_writeunlock(session, txn_global->scan_rwlock); +done: __wt_writeunlock(session, &txn_global->scan_rwlock); return (ret); } @@ -768,10 +768,8 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET(__wt_spin_init(session, &txn_global->id_lock, "transaction id lock")); - WT_RET(__wt_rwlock_alloc(session, - &txn_global->scan_rwlock, "transaction scan lock")); - WT_RET(__wt_rwlock_alloc(session, - &txn_global->nsnap_rwlock, "named snapshot lock")); + __wt_rwlock_init(session, &txn_global->scan_rwlock); + __wt_rwlock_init(session, &txn_global->nsnap_rwlock); txn_global->nsnap_oldest_id = WT_TXN_NONE; TAILQ_INIT(&txn_global->nsnaph); diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 399d9187d82..3b19162fd3d 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -679,7 +679,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) * This allows ordinary visibility checks to move forward because * checkpoints often take a long time and only write to the metadata. */ - __wt_writelock(session, txn_global->scan_rwlock); + __wt_writelock(session, &txn_global->scan_rwlock); txn_global->checkpoint_txnid = txn->id; txn_global->checkpoint_pinned = WT_MIN(txn->id, txn->snap_min); @@ -700,7 +700,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) */ txn_state->id = txn_state->pinned_id = txn_state->metadata_pinned = WT_TXN_NONE; - __wt_writeunlock(session, txn_global->scan_rwlock); + __wt_writeunlock(session, &txn_global->scan_rwlock); /* * Unblock updates -- we can figure out that any updates to clean pages @@ -1159,7 +1159,7 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session, * Hold the lock until we're done (blocking hot backups from starting), * we don't want to race with a future hot backup. */ - __wt_readlock(session, conn->hot_backup_lock); + __wt_readlock(session, &conn->hot_backup_lock); hot_backup_locked = true; if (conn->hot_backup) WT_CKPT_FOREACH(ckptbase, ckpt) { @@ -1233,7 +1233,7 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session, WT_ASSERT(session, !is_checkpoint || !F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)); - __wt_readunlock(session, conn->hot_backup_lock); + __wt_readunlock(session, &conn->hot_backup_lock); WT_ASSERT(session, btree->ckpt == NULL); btree->ckpt = ckptbase; @@ -1241,7 +1241,7 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session, return (0); err: if (hot_backup_locked) - __wt_readunlock(session, conn->hot_backup_lock); + __wt_readunlock(session, &conn->hot_backup_lock); __wt_meta_ckptlist_free(session, ckptbase); __wt_free(session, name_alloc); diff --git a/src/txn/txn_nsnap.c b/src/txn/txn_nsnap.c index 65ec1a6662f..659570dbcd9 100644 --- a/src/txn/txn_nsnap.c +++ b/src/txn/txn_nsnap.c @@ -211,9 +211,9 @@ __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[]) if (TAILQ_EMPTY(&txn_global->nsnaph)) { WT_ASSERT(session, txn_global->nsnap_oldest_id == WT_TXN_NONE && !__wt_txn_visible_all(session, nsnap_new->pinned_id)); - __wt_readlock(session, txn_global->scan_rwlock); + __wt_readlock(session, &txn_global->scan_rwlock); txn_global->nsnap_oldest_id = nsnap_new->pinned_id; - __wt_readunlock(session, txn_global->scan_rwlock); + __wt_readunlock(session, &txn_global->scan_rwlock); } TAILQ_INSERT_TAIL(&txn_global->nsnaph, nsnap_new, q); WT_STAT_CONN_INCR(session, txn_snapshots_created); @@ -297,16 +297,16 @@ __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval) if (session->ncursors > 0) WT_RET(__wt_session_copy_values(session)); - __wt_readlock(session, txn_global->nsnap_rwlock); + __wt_readlock(session, &txn_global->nsnap_rwlock); TAILQ_FOREACH(nsnap, &txn_global->nsnaph, q) if (WT_STRING_MATCH(nsnap->name, nameval->str, nameval->len)) { /* * Acquire the scan lock so the oldest ID can't move * forward without seeing our pinned ID. */ - __wt_readlock(session, txn_global->scan_rwlock); + __wt_readlock(session, &txn_global->scan_rwlock); txn_state->pinned_id = nsnap->pinned_id; - __wt_readunlock(session, txn_global->scan_rwlock); + __wt_readunlock(session, &txn_global->scan_rwlock); WT_ASSERT(session, !__wt_txn_visible_all( session, txn_state->pinned_id) && @@ -327,7 +327,7 @@ __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval) F_SET(txn, WT_TXN_HAS_SNAPSHOT); break; } - __wt_readunlock(session, txn_global->nsnap_rwlock); + __wt_readunlock(session, &txn_global->nsnap_rwlock); if (nsnap == NULL) WT_RET_MSG(session, EINVAL, -- cgit v1.2.1 From b99a91ec0fa042c867158f51cfd3a0106d7ac535 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Fri, 23 Dec 2016 17:59:45 +1100 Subject: WT-3086 lint. (#3217) --- dist/s_funcs.list | 1 - src/evict/evict_lru.c | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/dist/s_funcs.list b/dist/s_funcs.list index 01835390997..b73767cad13 100644 --- a/dist/s_funcs.list +++ b/dist/s_funcs.list @@ -13,7 +13,6 @@ __wt_bloom_get __wt_bulk_insert_fix __wt_bulk_insert_row __wt_bulk_insert_var -__wt_cache_dump __wt_config_getone __wt_cursor_get_raw_value __wt_debug_addr diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 214b5c007cb..b4cb2cc229a 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -2036,6 +2036,7 @@ __dump_txn_state(WT_SESSION_IMPL *session, FILE *fp) continue; txn = &conn->sessions[i].txn; + iso_tag = "INVALID"; switch (txn->isolation) { case WT_ISO_READ_COMMITTED: iso_tag = "WT_ISO_READ_COMMITTED"; @@ -2046,9 +2047,6 @@ __dump_txn_state(WT_SESSION_IMPL *session, FILE *fp) case WT_ISO_SNAPSHOT: iso_tag = "WT_ISO_SNAPSHOT"; break; - default: - iso_tag = "INVALID"; - break; } if (fprintf(fp, -- cgit v1.2.1 From 4d0b97a7f138f4079024c23ce9cfb70827bc133c Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Sat, 24 Dec 2016 01:47:19 +1100 Subject: WT-3093 Coverity lint. (#3216) --- src/btree/bt_slvg.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c index a8243eba17f..fea979cac6e 100644 --- a/src/btree/bt_slvg.c +++ b/src/btree/bt_slvg.c @@ -1235,7 +1235,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) WT_PAGE *page; WT_SALVAGE_COOKIE *cookie, _cookie; uint64_t recno, skip, take; - uint32_t *entriesp, save_entries; + uint32_t save_entries; cookie = &_cookie; WT_CLEAR(*cookie); @@ -1244,11 +1244,8 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) WT_RET(__wt_page_in(session, ref, 0)); page = ref->page; - entriesp = - page->type == WT_PAGE_COL_VAR ? &page->entries : &page->entries; - save_col_var = page->pg_var; - save_entries = *entriesp; + save_entries = page->entries; /* * Calculate the number of K/V entries we are going to skip, and @@ -1304,7 +1301,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) /* Reset the page. */ page->pg_var = save_col_var; - *entriesp = save_entries; + page->entries = save_entries; ret = __wt_page_release(session, ref, 0); if (ret == 0) -- cgit v1.2.1 From 74cc96ce14d386d9f81a45cca7adddaaab5fb9d5 Mon Sep 17 00:00:00 2001 From: "Alexandra (Sasha) Fedorova" Date: Wed, 28 Dec 2016 08:07:35 -0800 Subject: WT-2898 evict dynamic workers (#3039) * WT-2898. NOT ready for review. Initial implementation of dynamically tuning the number of eviction workers. * Not ready for review. All the code is there. Still need to test/tune on different machines. * Remove debugging prints. * Style police * Spelling. * Fixup merge issue and compiler warning * Sulab and David do a review! * Fix compiler warning. Not ready for review. There is a performance regression after merging with develop. I'm on it. * Conversion to signed values in percent calculation to make sure that we always correctly compute percent difference, which can be negative, regardless of how the complier performs sign extension. Change thresholds so we have less churn. * Fix more compiler warnnings. Sorry about the churn, I don't see the same failures locally as on the autobuild even though I compile with -Werror. * Replace 1/0 with true/false * More compiler warning and style fixes. Configured with --enable-strict, so hopefully I have caught everything this time. * Minor nit/pick, init a variable * Rename free to free_thread, otherwise hides a global * Fix indentation * Fixes to the changes. Percent difference must be signed as it can be negative if the number of pages evicted per second decreased since the last period. * Added stats and log messages to reflect eviction worker tuning activity. Fixed a bug in the code that checks the group bounds when stopping the thread. * Removed verbose message, because we already have a statistic tracking evictions per second, so this is probably redundant. * whitespace * KNF * More aggressive addition/removal of eviction workers. We used to add/remove them one at a time; it's difficult to see the effects of extra workers with such an incremental change, because eviction throughput is affected by other noise, such as what happens in the kernel and in the I/O system. Now we add and remove eviction workers in batches. * Style fixes. * Fix compiler warning. * Simplified the tuning logic. Addressed Sulabh's comments. * A tuning parameter change * Fixed a bug where we needed a random value, but were not getting it via the random number generator, so it was not random and the code did not have the right behaviour. Added stats. * Move the call to tune evict workers into __evict_pass, so we can begin the tuning earlier. * NOT READY FOR REVIEW. Changed defaults for the number of eviction workers, so I can experiment with larger values. * NOT READY FOR REVIEW. A parameter to put a cap on how many threads we are adding at a time. * Reverse the changes of the last commit. That change hurt performance. * Changed all wtperf runners that set eviction thread maximum to 30, so we could evaluate the effects of the dynamic branch. * Updated the number reserved for internal sessions to 40, since we can now create up to 30 eviction worker threads by default. * Fix spellchecker complaints * KNF * NOT READY FOR REVIEW. Revised the algorithm to settle on a good value of evict workers once we sufficiently explore the configuration space using the gradient descent approach with random adjustments. The algorithm successfully finds the best static number of workers, but performs works. I suspect that there is an issue with how threads are removed. Suspect a bug in thread support code. Have not chased it yet. * Remove prints, add stats. * Fix a copy-paste bug where a code line was inadvertently eliminated. * Reduce the maximum for eviction workers to 30. Prevent dereferencing a NULL pointer if we dynamically grow a thread group after we've shrunk it and freed the associated memory. * Cleaned up and simplified the code. * NOT READY FOR REVIEW. A new version of the tuning algorithm that fixes a memory issue when we try to pre-allocate a large eviction thread group. Still need to tune and clean up the code. * Clean up the code. * Get rid of s_label warnings. Remove unused code. * Fix various style errors. * Fixed the logic in figuring out the maximum value for eviction threads upon cache creation or reconfiguration, which had caused a crash in one of the tests. * Changed default max for the number of eviction threads to eight. * Fix ranges for the minimum number of eviction threads * Fix eviction thread ranges to make the csuite happy * Commit automatic changes by s_all * Review: KNF, whitespace and renamed a few things. * Fix lock usage * KNF --- bench/wtperf/runners/500m-btree-50r50u.wtperf | 2 +- bench/wtperf/runners/500m-btree-80r20u.wtperf | 2 +- bench/wtperf/runners/500m-btree-populate.wtperf | 2 +- bench/wtperf/runners/500m-btree-rdonly.wtperf | 2 +- bench/wtperf/runners/checkpoint-stress.wtperf | 2 +- bench/wtperf/runners/evict-btree-readonly.wtperf | 2 +- .../wtperf/runners/evict-btree-stress-multi.wtperf | 2 +- bench/wtperf/runners/evict-btree-stress.wtperf | 2 +- bench/wtperf/runners/evict-btree.wtperf | 2 +- bench/wtperf/runners/evict-lsm-readonly.wtperf | 2 +- bench/wtperf/runners/evict-lsm.wtperf | 2 +- bench/wtperf/runners/log.wtperf | 2 +- .../wtperf/runners/mongodb-secondary-apply.wtperf | 2 +- .../runners/multi-btree-read-heavy-stress.wtperf | 2 +- bench/wtperf/runners/multi-btree-stress.wtperf | 2 +- .../runners/multi-btree-zipfian-populate.wtperf | 2 +- .../runners/multi-btree-zipfian-workload.wtperf | 2 +- bench/wtperf/stress/btree-split-stress.wtperf | 2 +- dist/api_data.py | 2 +- dist/stat_data.py | 4 + src/config/config_def.c | 10 +- src/conn/conn_cache.c | 4 +- src/evict/evict_lru.c | 204 ++++++++++- src/include/connection.h | 14 +- src/include/extern.h | 1 + src/include/stat.h | 4 + src/include/wiredtiger.in | 384 +++++++++++---------- src/support/stat.c | 16 + src/support/thread_group.c | 55 ++- tools/wtstats/stat_data.py | 2 + 30 files changed, 497 insertions(+), 239 deletions(-) diff --git a/bench/wtperf/runners/500m-btree-50r50u.wtperf b/bench/wtperf/runners/500m-btree-50r50u.wtperf index 536127f0dd8..4d2a70f1107 100644 --- a/bench/wtperf/runners/500m-btree-50r50u.wtperf +++ b/bench/wtperf/runners/500m-btree-50r50u.wtperf @@ -5,7 +5,7 @@ # # Set cache to half of memory of AWS perf instance. Enable logging and # checkpoints. Collect wiredtiger stats for ftdc. -conn_config="cache_size=16G,checkpoint=(wait=60,log_size=2GB),session_max=20000,log=(enabled),statistics=(fast),statistics_log=(wait=30,json),eviction=(threads_max=4)" +conn_config="cache_size=16G,checkpoint=(wait=60,log_size=2GB),session_max=20000,log=(enabled),statistics=(fast),statistics_log=(wait=30,json),eviction=(threads_max=8)" create=false compression="snappy" sess_config="isolation=snapshot" diff --git a/bench/wtperf/runners/500m-btree-80r20u.wtperf b/bench/wtperf/runners/500m-btree-80r20u.wtperf index d6218c44af0..6645df835df 100644 --- a/bench/wtperf/runners/500m-btree-80r20u.wtperf +++ b/bench/wtperf/runners/500m-btree-80r20u.wtperf @@ -5,7 +5,7 @@ # # Set cache to half of memory of AWS perf instance. Enable logging and # checkpoints. Collect wiredtiger stats for ftdc. -conn_config="cache_size=16G,checkpoint=(wait=60,log_size=2GB),session_max=20000,log=(enabled),statistics=(fast),statistics_log=(wait=30,json),eviction=(threads_max=4)" +conn_config="cache_size=16G,checkpoint=(wait=60,log_size=2GB),session_max=20000,log=(enabled),statistics=(fast),statistics_log=(wait=30,json),eviction=(threads_max=8)" create=false compression="snappy" # close_conn as false allows this test to close/finish faster, but if running diff --git a/bench/wtperf/runners/500m-btree-populate.wtperf b/bench/wtperf/runners/500m-btree-populate.wtperf index f9aed094aa1..ab7b17ca683 100644 --- a/bench/wtperf/runners/500m-btree-populate.wtperf +++ b/bench/wtperf/runners/500m-btree-populate.wtperf @@ -9,7 +9,7 @@ # # This generates about 80 Gb of uncompressed data. But it should compress # well and be small on disk. -conn_config="cache_size=16G,checkpoint=(wait=60,log_size=2GB),session_max=20000,log=(enabled),statistics=(fast),statistics_log=(wait=30,json),eviction=(threads_max=4)" +conn_config="cache_size=16G,checkpoint=(wait=60,log_size=2GB),session_max=20000,log=(enabled),statistics=(fast),statistics_log=(wait=30,json),eviction=(threads_max=8)" compact=true compression="snappy" sess_config="isolation=snapshot" diff --git a/bench/wtperf/runners/500m-btree-rdonly.wtperf b/bench/wtperf/runners/500m-btree-rdonly.wtperf index 2c9540ff589..e8958d20e2c 100644 --- a/bench/wtperf/runners/500m-btree-rdonly.wtperf +++ b/bench/wtperf/runners/500m-btree-rdonly.wtperf @@ -5,7 +5,7 @@ # # Set cache to half of memory of AWS perf instance. Enable logging and # checkpoints. Collect wiredtiger stats for ftdc. -conn_config="cache_size=16G,checkpoint=(wait=60,log_size=2GB),session_max=20000,log=(enabled),statistics=(fast),statistics_log=(wait=30,json),eviction=(threads_max=4)" +conn_config="cache_size=16G,checkpoint=(wait=60,log_size=2GB),session_max=20000,log=(enabled),statistics=(fast),statistics_log=(wait=30,json),eviction=(threads_max=8)" create=false compression="snappy" sess_config="isolation=snapshot" diff --git a/bench/wtperf/runners/checkpoint-stress.wtperf b/bench/wtperf/runners/checkpoint-stress.wtperf index bbd3a3ba5ed..5daa276e622 100644 --- a/bench/wtperf/runners/checkpoint-stress.wtperf +++ b/bench/wtperf/runners/checkpoint-stress.wtperf @@ -1,6 +1,6 @@ # A stress configuration to create long running checkpoints while doing a lot # of updates. -conn_config="cache_size=16GB,eviction=(threads_max=4),log=(enabled=false)" +conn_config="cache_size=16GB,eviction=(threads_max=8),log=(enabled=false)" table_config="leaf_page_max=32k,internal_page_max=16k,allocation_size=4k,split_pct=90,type=file" # Enough data to fill the cache. 150 million 1k records results in two ~11GB # tables diff --git a/bench/wtperf/runners/evict-btree-readonly.wtperf b/bench/wtperf/runners/evict-btree-readonly.wtperf index 25599fadd8d..972bc371f2d 100644 --- a/bench/wtperf/runners/evict-btree-readonly.wtperf +++ b/bench/wtperf/runners/evict-btree-readonly.wtperf @@ -1,5 +1,5 @@ # wtperf options file: evict btree configuration -conn_config="cache_size=50M,eviction=(threads_max=4),mmap=false" +conn_config="cache_size=50M,eviction=(threads_max=8),mmap=false" table_config="type=file" icount=10000000 report_interval=5 diff --git a/bench/wtperf/runners/evict-btree-stress-multi.wtperf b/bench/wtperf/runners/evict-btree-stress-multi.wtperf index a5a29f66fa0..5a2cad6d78e 100644 --- a/bench/wtperf/runners/evict-btree-stress-multi.wtperf +++ b/bench/wtperf/runners/evict-btree-stress-multi.wtperf @@ -1,4 +1,4 @@ -conn_config="cache_size=1G,eviction=(threads_max=4),session_max=2000" +conn_config="cache_size=1G,eviction=(threads_max=8),session_max=2000" table_config="type=file" table_count=100 close_conn=false diff --git a/bench/wtperf/runners/evict-btree-stress.wtperf b/bench/wtperf/runners/evict-btree-stress.wtperf index 740fb88c050..96e3f01b325 100644 --- a/bench/wtperf/runners/evict-btree-stress.wtperf +++ b/bench/wtperf/runners/evict-btree-stress.wtperf @@ -1,5 +1,5 @@ # wtperf options file: evict btree configuration -conn_config="cache_size=50M,eviction=(threads_max=4)" +conn_config="cache_size=50M,eviction=(threads_max=8)" table_config="type=file" icount=10000000 report_interval=5 diff --git a/bench/wtperf/runners/evict-btree.wtperf b/bench/wtperf/runners/evict-btree.wtperf index e7d967e5c63..3810e6a8294 100644 --- a/bench/wtperf/runners/evict-btree.wtperf +++ b/bench/wtperf/runners/evict-btree.wtperf @@ -1,5 +1,5 @@ # wtperf options file: evict btree configuration -conn_config="cache_size=50M,eviction=(threads_max=4)" +conn_config="cache_size=50M,eviction=(threads_max=8)" table_config="type=file" icount=10000000 report_interval=5 diff --git a/bench/wtperf/runners/evict-lsm-readonly.wtperf b/bench/wtperf/runners/evict-lsm-readonly.wtperf index 661b8e21924..470dca695dd 100644 --- a/bench/wtperf/runners/evict-lsm-readonly.wtperf +++ b/bench/wtperf/runners/evict-lsm-readonly.wtperf @@ -1,5 +1,5 @@ # wtperf options file: evict lsm configuration -conn_config="cache_size=50M,lsm_manager=(worker_thread_max=6),eviction=(threads_max=4)" +conn_config="cache_size=50M,lsm_manager=(worker_thread_max=6),eviction=(threads_max=8)" table_config="type=lsm,lsm=(chunk_size=2M),os_cache_dirty_max=16MB" compact=true icount=10000000 diff --git a/bench/wtperf/runners/evict-lsm.wtperf b/bench/wtperf/runners/evict-lsm.wtperf index b872d429046..a0f2a78d013 100644 --- a/bench/wtperf/runners/evict-lsm.wtperf +++ b/bench/wtperf/runners/evict-lsm.wtperf @@ -1,5 +1,5 @@ # wtperf options file: evict lsm configuration -conn_config="cache_size=50M,lsm_manager=(worker_thread_max=6),eviction=(threads_max=4)" +conn_config="cache_size=50M,lsm_manager=(worker_thread_max=6),eviction=(threads_max=8)" table_config="type=lsm,lsm=(chunk_size=2M),os_cache_dirty_max=16MB" compact=true icount=10000000 diff --git a/bench/wtperf/runners/log.wtperf b/bench/wtperf/runners/log.wtperf index 6cf50dfb5a5..4379ba22373 100644 --- a/bench/wtperf/runners/log.wtperf +++ b/bench/wtperf/runners/log.wtperf @@ -16,7 +16,7 @@ # - Config + "-C "checkpoint=(wait=0)": no checkpoints # - Config + "-C "log=(enabled,prealloc=false,file_max=1M)": no pre-allocation # -conn_config="cache_size=5G,log=(enabled=true),checkpoint=(log_size=500M),eviction=(threads_max=4)" +conn_config="cache_size=5G,log=(enabled=true),checkpoint=(log_size=500M),eviction=(threads_max=8)" table_config="type=file" icount=1000000 report_interval=5 diff --git a/bench/wtperf/runners/mongodb-secondary-apply.wtperf b/bench/wtperf/runners/mongodb-secondary-apply.wtperf index f9e41184f95..58bd1a76b97 100644 --- a/bench/wtperf/runners/mongodb-secondary-apply.wtperf +++ b/bench/wtperf/runners/mongodb-secondary-apply.wtperf @@ -1,5 +1,5 @@ # Simulate the MongoDB oplog apply threads on a secondary. -conn_config="cache_size=10GB,session_max=1000,eviction=(threads_min=4,threads_max=4),log=(enabled=false),transaction_sync=(enabled=false),checkpoint_sync=true,checkpoint=(wait=60),statistics=(fast),statistics_log=(json,wait=1)" +conn_config="cache_size=10GB,session_max=1000,eviction=(threads_min=4,threads_max=8),log=(enabled=false),transaction_sync=(enabled=false),checkpoint_sync=true,checkpoint=(wait=60),statistics=(fast),statistics_log=(json,wait=1)" table_config="allocation_size=4k,memory_page_max=5MB,prefix_compression=false,split_pct=75,leaf_page_max=32k,internal_page_max=16k,type=file" # Spread the workload out over several tables. table_count=4 diff --git a/bench/wtperf/runners/multi-btree-read-heavy-stress.wtperf b/bench/wtperf/runners/multi-btree-read-heavy-stress.wtperf index d7b27f8fda4..f07e6c80b39 100644 --- a/bench/wtperf/runners/multi-btree-read-heavy-stress.wtperf +++ b/bench/wtperf/runners/multi-btree-read-heavy-stress.wtperf @@ -2,7 +2,7 @@ # up by dividing the workload across a lot of threads. This needs to be # tuned to the particular machine so the workload is close to capacity in the # steady state, but not overwhelming. -conn_config="cache_size=20GB,session_max=1000,eviction=(threads_min=4,threads_max=4),log=(enabled=false),transaction_sync=(enabled=false),checkpoint_sync=true,checkpoint=(wait=60),statistics=(fast),statistics_log=(json,wait=1)" +conn_config="cache_size=20GB,session_max=1000,eviction=(threads_min=4,threads_max=8),log=(enabled=false),transaction_sync=(enabled=false),checkpoint_sync=true,checkpoint=(wait=60),statistics=(fast),statistics_log=(json,wait=1)" table_config="allocation_size=4k,memory_page_max=10MB,prefix_compression=false,split_pct=90,leaf_page_max=32k,internal_page_max=16k,type=file" # Divide original icount by database_count. table_count=8 diff --git a/bench/wtperf/runners/multi-btree-stress.wtperf b/bench/wtperf/runners/multi-btree-stress.wtperf index b10b08f6035..bee1f431043 100644 --- a/bench/wtperf/runners/multi-btree-stress.wtperf +++ b/bench/wtperf/runners/multi-btree-stress.wtperf @@ -1,7 +1,7 @@ # wtperf options file: multi-database configuration attempting to # trigger slow operations by overloading CPU and disk. # References Jira WT-2131 -conn_config="cache_size=2GB,eviction=(threads_min=2,threads_max=2),log=(enabled=false),direct_io=(data,checkpoint),buffer_alignment=4096,checkpoint_sync=true,checkpoint=(wait=60)" +conn_config="cache_size=2GB,eviction=(threads_min=2,threads_max=8),log=(enabled=false),direct_io=(data,checkpoint),buffer_alignment=4096,checkpoint_sync=true,checkpoint=(wait=60)" table_config="allocation_size=4k,prefix_compression=false,split_pct=75,leaf_page_max=4k,internal_page_max=16k,leaf_item_max=1433,internal_item_max=3100,type=file" # Divide original icount by database_count. database_count=5 diff --git a/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf b/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf index ddd9c055eac..1fdba049779 100644 --- a/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf +++ b/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf @@ -1,5 +1,5 @@ # Create a set of tables with uneven distribution of data -conn_config="cache_size=1G,eviction=(threads_max=4),file_manager=(close_idle_time=100000),checkpoint=(wait=60,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000" +conn_config="cache_size=1G,eviction=(threads_max=8),file_manager=(close_idle_time=100000),checkpoint=(wait=60,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000" table_config="type=file" table_count=100 icount=0 diff --git a/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf b/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf index 380350c88c8..dfb3306a7a5 100644 --- a/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf +++ b/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf @@ -1,5 +1,5 @@ # Read from a set of tables with uneven distribution of data -conn_config="cache_size=1G,eviction=(threads_max=4),file_manager=(close_idle_time=100000),checkpoint=(wait=60,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000" +conn_config="cache_size=1G,eviction=(threads_max=8),file_manager=(close_idle_time=100000),checkpoint=(wait=60,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000" table_config="type=file" table_count=100 icount=0 diff --git a/bench/wtperf/stress/btree-split-stress.wtperf b/bench/wtperf/stress/btree-split-stress.wtperf index deb8c70d12f..86bb288fc6d 100644 --- a/bench/wtperf/stress/btree-split-stress.wtperf +++ b/bench/wtperf/stress/btree-split-stress.wtperf @@ -1,4 +1,4 @@ -conn_config="cache_size=2GB,statistics=[fast,clear],statistics_log=(wait=10),eviction=(threads_max=4,threads_min=4)" +conn_config="cache_size=2GB,statistics=[fast,clear],statistics_log=(wait=10),eviction=(threads_max=8,threads_min=4)" table_config="type=file,leaf_page_max=8k,internal_page_max=8k,memory_page_max=2MB,split_deepen_min_child=250" icount=200000 report_interval=5 diff --git a/dist/api_data.py b/dist/api_data.py index 98f9b5a230a..04071a84332 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -406,7 +406,7 @@ connection_runtime_config = [ Config('eviction', '', r''' eviction configuration options''', type='category', subconfig=[ - Config('threads_max', '1', r''' + Config('threads_max', '8', r''' maximum number of threads WiredTiger will start to help evict pages from cache. The number of threads started will vary depending on the current eviction load. Each eviction worker diff --git a/dist/stat_data.py b/dist/stat_data.py index c481382dafc..0af5d6d017e 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -193,6 +193,7 @@ connection_stats = [ CacheStat('cache_bytes_other', 'bytes not belonging to page images in the cache', 'no_clear,no_scale,size'), CacheStat('cache_bytes_read', 'bytes read into cache', 'size'), CacheStat('cache_bytes_write', 'bytes written from cache', 'size'), + CacheStat('cache_eviction_active_workers', 'eviction worker thread active', 'no_clear'), CacheStat('cache_eviction_aggressive_set', 'eviction currently operating in aggressive mode', 'no_clear,no_scale'), CacheStat('cache_eviction_app', 'pages evicted by application threads'), CacheStat('cache_eviction_app_dirty', 'modified pages evicted by application threads'), @@ -222,12 +223,15 @@ connection_stats = [ CacheStat('cache_eviction_slow', 'eviction server unable to reach eviction goal'), CacheStat('cache_eviction_split_internal', 'internal pages split during eviction'), CacheStat('cache_eviction_split_leaf', 'leaf pages split during eviction'), + CacheStat('cache_eviction_stable_state_workers', 'eviction worker thread stable number', 'no_clear'), CacheStat('cache_eviction_state', 'eviction state', 'no_clear,no_scale'), CacheStat('cache_eviction_walk', 'pages walked for eviction'), CacheStat('cache_eviction_walks_abandoned', 'eviction walks abandoned'), CacheStat('cache_eviction_walks_active', 'files with active eviction walks', 'no_clear,no_scale'), CacheStat('cache_eviction_walks_started', 'files with new eviction walks started'), CacheStat('cache_eviction_worker_evicting', 'eviction worker thread evicting pages'), + CacheStat('cache_eviction_worker_created', 'eviction worker thread created'), + CacheStat('cache_eviction_worker_removed', 'eviction worker thread removed'), CacheStat('cache_hazard_checks', 'hazard pointer check calls'), CacheStat('cache_hazard_max', 'hazard pointer maximum array length', 'max_aggregate,no_scale'), CacheStat('cache_hazard_walks', 'hazard pointer check entries walked'), diff --git a/src/config/config_def.c b/src/config/config_def.c index e4fd7937a40..83c1436eade 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -1050,7 +1050,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { { "WT_CONNECTION.reconfigure", "async=(enabled=false,ops_max=1024,threads=2),cache_overhead=8," "cache_size=100MB,checkpoint=(log_size=0,wait=0),error_prefix=," - "eviction=(threads_max=1,threads_min=1)," + "eviction=(threads_max=8,threads_min=1)," "eviction_checkpoint_target=5,eviction_dirty_target=5," "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" ",file_manager=(close_handle_minimum=250,close_idle_time=30," @@ -1261,7 +1261,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { ",builtin_extension_config=,cache_overhead=8,cache_size=100MB," "checkpoint=(log_size=0,wait=0),checkpoint_sync=true," "config_base=true,create=false,direct_io=,encryption=(keyid=," - "name=,secretkey=),error_prefix=,eviction=(threads_max=1," + "name=,secretkey=),error_prefix=,eviction=(threads_max=8," "threads_min=1),eviction_checkpoint_target=5," "eviction_dirty_target=5,eviction_dirty_trigger=20," "eviction_target=80,eviction_trigger=95,exclusive=false," @@ -1285,7 +1285,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { ",builtin_extension_config=,cache_overhead=8,cache_size=100MB," "checkpoint=(log_size=0,wait=0),checkpoint_sync=true," "config_base=true,create=false,direct_io=,encryption=(keyid=," - "name=,secretkey=),error_prefix=,eviction=(threads_max=1," + "name=,secretkey=),error_prefix=,eviction=(threads_max=8," "threads_min=1),eviction_checkpoint_target=5," "eviction_dirty_target=5,eviction_dirty_trigger=20," "eviction_target=80,eviction_trigger=95,exclusive=false," @@ -1309,7 +1309,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { ",builtin_extension_config=,cache_overhead=8,cache_size=100MB," "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,direct_io=," "encryption=(keyid=,name=,secretkey=),error_prefix=," - "eviction=(threads_max=1,threads_min=1)," + "eviction=(threads_max=8,threads_min=1)," "eviction_checkpoint_target=5,eviction_dirty_target=5," "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" @@ -1330,7 +1330,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { ",builtin_extension_config=,cache_overhead=8,cache_size=100MB," "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,direct_io=," "encryption=(keyid=,name=,secretkey=),error_prefix=," - "eviction=(threads_max=1,threads_min=1)," + "eviction=(threads_max=8,threads_min=1)," "eviction_checkpoint_target=5,eviction_dirty_target=5," "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95" ",extensions=,file_extend=,file_manager=(close_handle_minimum=250" diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c index fe5f94ea03d..9b07b46abcd 100644 --- a/src/conn/conn_cache.c +++ b/src/conn/conn_cache.c @@ -143,7 +143,9 @@ __wt_cache_config(WT_SESSION_IMPL *session, bool reconfigure, const char *cfg[]) if (reconfigure) WT_RET(__wt_thread_group_resize( session, &conn->evict_threads, - conn->evict_threads_min, conn->evict_threads_max, + conn->evict_threads_min, + WT_MAX(conn->evict_threads_min, + WT_MIN(conn->evict_threads_max, EVICT_GROUP_INCR)), WT_THREAD_CAN_WAIT | WT_THREAD_PANIC_FAIL)); return (0); diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index b4cb2cc229a..485fd0e6d40 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -15,6 +15,7 @@ static int __evict_lru_walk(WT_SESSION_IMPL *); static int __evict_page(WT_SESSION_IMPL *, bool); static int __evict_pass(WT_SESSION_IMPL *); static int __evict_server(WT_SESSION_IMPL *, bool *); +static int __evict_tune_workers(WT_SESSION_IMPL *session); static int __evict_walk(WT_SESSION_IMPL *, WT_EVICT_QUEUE *); static int __evict_walk_file( WT_SESSION_IMPL *, WT_EVICT_QUEUE *, u_int, u_int *); @@ -389,10 +390,19 @@ __wt_evict_create(WT_SESSION_IMPL *session) /* Set first, the thread might run before we finish up. */ F_SET(conn, WT_CONN_EVICTION_RUN); - /* Create the eviction thread group */ + /* + * Create the eviction thread group. + * We don't set the group size to the maximum allowed sessions, + * because this may have adverse memory effects. Instead, + * we set the group's maximum to a small value. The code + * that tunes the number of workers will increase the + * maximum if necessary. + */ WT_RET(__wt_thread_group_create(session, &conn->evict_threads, "eviction-server", conn->evict_threads_min, - conn->evict_threads_max, WT_THREAD_CAN_WAIT | WT_THREAD_PANIC_FAIL, + WT_MAX(conn->evict_threads_min, + WT_MIN(conn->evict_threads_max, EVICT_GROUP_INCR)), + WT_THREAD_CAN_WAIT | WT_THREAD_PANIC_FAIL, __wt_evict_thread_run)); /* @@ -548,6 +558,8 @@ __evict_pass(WT_SESSION_IMPL *session) if (loop == 0) prev = now; + if (conn->evict_threads.threads[0]->session == session) + __evict_tune_workers(session); /* * Increment the shared read generation. Do this occasionally * even if eviction is not currently required, so that pages @@ -573,14 +585,6 @@ __evict_pass(WT_SESSION_IMPL *session) if (!__evict_update_work(session)) break; - /* - * Try to start a new thread if we have capacity and haven't - * reached the eviction targets. - */ - if (F_ISSET(cache, WT_CACHE_EVICT_ALL)) - WT_RET(__wt_thread_group_start_one( - session, &conn->evict_threads, false)); - __wt_verbose(session, WT_VERB_EVICTSERVER, "Eviction pass with: Max: %" PRIu64 " In use: %" PRIu64 " Dirty: %" PRIu64, @@ -844,6 +848,182 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) __wt_spin_unlock(session, &cache->evict_walk_lock); } +#define EVICT_TUNE_BATCH 1 /* Max workers to add each period */ +#define EVICT_TUNE_DATAPT_MIN 3 /* Data points needed before deciding + if we should keep adding workers or + settle on an earlier value. */ +#define EVICT_TUNE_PERIOD 2 /* Tune period in seconds */ + +/* + * __evict_tune_workers -- + * Find the right number of eviction workers. Gradually ramp up the number of + * workers increasing the number in batches indicated by the setting above. + * Store the number of workers that gave us the best throughput so far and + * the number of data points we have tried. + * + * Every once in a while when we have the minimum number of data points + * we check whether the eviction throughput achieved with the current number + * of workers is the best we have seen so far. If so, we will keep increasing + * the number of workers. If not, we are past the infliction point on the + * eviction throughput curve. In that case, we will set the number of workers + * to the best observed so far and settle into a stable state. + */ +static int +__evict_tune_workers(WT_SESSION_IMPL *session) +{ + struct timespec current_time; + WT_CACHE *cache; + WT_CONNECTION_IMPL *conn; + uint64_t cur_threads, delta_msec, delta_pages, i, target_threads; + uint64_t pgs_evicted_cur, pgs_evicted_persec_cur; + uint32_t new_max, thread_surplus; + + conn = S2C(session); + cache = conn->cache; + + WT_ASSERT(session, conn->evict_threads.threads[0]->session == session); + pgs_evicted_persec_cur = 0; + + if (conn->evict_tune_stable) + return (0); + + __wt_epoch(session, ¤t_time); + + /* + * Every EVICT_TUNE_PERIOD seconds record the number of + * pages evicted per second observed in the previous period. + */ + if (WT_TIMEDIFF_SEC( + current_time, conn->evict_tune_last_time) < EVICT_TUNE_PERIOD) + return (0); + + pgs_evicted_cur = cache->pages_evict; + + /* + * If we have recorded the number of pages evicted at the end of + * the previous measurement interval, we can compute the eviction + * rate in evicted pages per second achieved during the current + * measurement interval. + * Otherwise, we just record the number of evicted pages and return. + */ + if (conn->evict_tune_pgs_last == 0) + goto out; + + delta_msec = WT_TIMEDIFF_MS(current_time, conn->evict_tune_last_time); + delta_pages = pgs_evicted_cur - conn->evict_tune_pgs_last; + pgs_evicted_persec_cur = (delta_pages * WT_THOUSAND) / delta_msec; + conn->evict_tune_num_points++; + + /* Keep track of the maximum eviction throughput seen and the number + * of workers corresponding to that throughput. + */ + if (pgs_evicted_persec_cur > conn->evict_tune_pg_sec_max) { + conn->evict_tune_pg_sec_max = pgs_evicted_persec_cur; + conn->evict_tune_workers_best = + conn->evict_threads.current_threads; + } + + /* + * Compare the current number of data points with the number + * needed variable. If they are equal, we will check whether + * we are still going up on the performance curve, in which + * case we will continue increasing the number of workers, or + * we are past the inflection point on the curve, in which case + * we will go back to the best observed number of workers and + * settle into a stable state. + */ + if (conn->evict_tune_num_points >= conn->evict_tune_datapts_needed) { + if ((conn->evict_tune_workers_best == + conn->evict_threads.current_threads) && + (conn->evict_threads.current_threads < + conn->evict_threads_max)) { + /* + * Keep adding workers. We will check again + * at the next check point. + */ + conn->evict_tune_datapts_needed += + WT_MIN(EVICT_TUNE_DATAPT_MIN, + (conn->evict_threads_max + - conn->evict_threads.current_threads)/ + EVICT_TUNE_BATCH); + } else { + /* + * We are past the inflection point. Choose the + * best number of eviction workers observed and + * settle into a stable state. + */ + thread_surplus = + conn->evict_threads.current_threads - + conn->evict_tune_workers_best; + + for (i = 0; i < thread_surplus; i++) { + WT_RET(__wt_thread_group_stop_one(session, + &conn->evict_threads, true)); + WT_STAT_CONN_INCR(session, + cache_eviction_worker_removed); + } + WT_STAT_CONN_SET(session, + cache_eviction_stable_state_workers, + conn->evict_tune_workers_best); + conn->evict_tune_stable = true; + WT_STAT_CONN_SET(session, cache_eviction_active_workers, + conn->evict_threads.current_threads); + return (0); + } + } + + /* + * If we have not added any worker threads in the past, we set the + * number needed equal to the number of data points that we must + * accumulate before deciding if we should keep adding workers or settle + * on a previously tried value of workers. + */ + if (conn->evict_tune_last_action_time.tv_sec == 0) + conn->evict_tune_datapts_needed = WT_MIN(EVICT_TUNE_DATAPT_MIN, + (conn->evict_threads_max - + conn->evict_threads.current_threads) / EVICT_TUNE_BATCH); + + if (F_ISSET(cache, WT_CACHE_EVICT_ALL)) { + cur_threads = conn->evict_threads.current_threads; + target_threads = WT_MIN(cur_threads + EVICT_TUNE_BATCH, + conn->evict_threads_max); + /* + * Resize the group to allow for an additional batch of threads. + * We resize the group in increments of a few sessions. + * Allocating the group to accommodate the maximum number of + * workers has adverse effects on performance due to memory + * effects, so we gradually ramp up the allocation. + */ + if (conn->evict_threads.max < target_threads) { + new_max = WT_MIN(conn->evict_threads.max + + EVICT_GROUP_INCR, conn->evict_threads_max); + + WT_RET(__wt_thread_group_resize( + session, &conn->evict_threads, + conn->evict_threads_min, new_max, + WT_THREAD_CAN_WAIT | WT_THREAD_PANIC_FAIL)); + } + + /* Now actually start the new threads. */ + for (i = 0; i < (target_threads - cur_threads); ++i) { + WT_RET(__wt_thread_group_start_one(session, + &conn->evict_threads, false)); + WT_STAT_CONN_INCR(session, + cache_eviction_worker_created); + __wt_verbose(session, WT_VERB_EVICTSERVER, + "added worker thread"); + } + conn->evict_tune_last_action_time = current_time; + } + + WT_STAT_CONN_SET(session, cache_eviction_active_workers, + conn->evict_threads.current_threads); + +out: conn->evict_tune_last_time = current_time; + conn->evict_tune_pgs_last = pgs_evicted_cur; + return (0); +} + /* * __evict_lru_pages -- * Get pages from the LRU queue to evict. @@ -1282,8 +1462,8 @@ __evict_push_candidate(WT_SESSION_IMPL *session, * Get a few page eviction candidates from a single underlying file. */ static int -__evict_walk_file(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue, - u_int max_entries, u_int *slotp) +__evict_walk_file(WT_SESSION_IMPL *session, + WT_EVICT_QUEUE *queue, u_int max_entries, u_int *slotp) { WT_BTREE *btree; WT_CACHE *cache; diff --git a/src/include/connection.h b/src/include/connection.h index 6818633d816..665275440cf 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -301,6 +301,16 @@ struct __wt_connection_impl { uint32_t evict_threads_max;/* Max eviction threads */ uint32_t evict_threads_min;/* Min eviction threads */ +#define EVICT_GROUP_INCR 4 /* Evict group size increased in batches */ + uint32_t evict_tune_datapts_needed;/* Data needed to tune */ + struct timespec evict_tune_last_action_time;/* Time of last action */ + struct timespec evict_tune_last_time; /* Time of last check */ + uint32_t evict_tune_num_points; /* Number of values tried */ + uint64_t evict_tune_pgs_last; /* Number of pages evicted */ + uint64_t evict_tune_pg_sec_max; /* Max throughput encountered */ + bool evict_tune_stable; /* Are we stable? */ + uint32_t evict_tune_workers_best;/* Best performing value */ + #define WT_STATLOG_FILENAME "WiredTigerStat.%d.%H" WT_SESSION_IMPL *stat_session; /* Statistics log session */ wt_thread_t stat_tid; /* Statistics log thread */ @@ -326,11 +336,11 @@ struct __wt_connection_impl { bool log_tid_set; /* Log server thread set */ WT_CONDVAR *log_file_cond; /* Log file thread wait mutex */ WT_SESSION_IMPL *log_file_session;/* Log file thread session */ - wt_thread_t log_file_tid; /* Log file thread thread */ + wt_thread_t log_file_tid; /* Log file thread */ bool log_file_tid_set;/* Log file thread set */ WT_CONDVAR *log_wrlsn_cond;/* Log write lsn thread wait mutex */ WT_SESSION_IMPL *log_wrlsn_session;/* Log write lsn thread session */ - wt_thread_t log_wrlsn_tid; /* Log write lsn thread thread */ + wt_thread_t log_wrlsn_tid; /* Log write lsn thread */ bool log_wrlsn_tid_set;/* Log write lsn thread set */ WT_LOG *log; /* Logging structure */ WT_COMPRESSOR *log_compressor;/* Logging compressor */ diff --git a/src/include/extern.h b/src/include/extern.h index bcad3580e25..566eb386c29 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -728,6 +728,7 @@ extern int __wt_thread_group_resize( WT_SESSION_IMPL *session, WT_THREAD_GROUP * extern int __wt_thread_group_create( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, const char *name, uint32_t min, uint32_t max, uint32_t flags, int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_thread_group_start_one( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool wait) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_thread_group_stop_one( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool wait) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_txn_get_snapshot(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/stat.h b/src/include/stat.h index 3dcdf68b8d5..fd3e3290d95 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -310,7 +310,11 @@ struct __wt_connection_stats { int64_t cache_eviction_slow; int64_t cache_eviction_state; int64_t cache_eviction_walks_abandoned; + int64_t cache_eviction_active_workers; + int64_t cache_eviction_worker_created; int64_t cache_eviction_worker_evicting; + int64_t cache_eviction_worker_removed; + int64_t cache_eviction_stable_state_workers; int64_t cache_eviction_force_fail; int64_t cache_eviction_walks_active; int64_t cache_eviction_walks_started; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 9ee28317bc4..7c27baa9395 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1855,7 +1855,7 @@ struct __wt_connection { * threads WiredTiger will start to help evict pages from cache. The * number of threads started will vary depending on the current eviction * load. Each eviction worker thread uses a session from the configured - * session_max., an integer between 1 and 20; default \c 1.} + * session_max., an integer between 1 and 20; default \c 8.} * @config{    threads_min, minimum number of * threads WiredTiger will start to help evict pages from cache. The * number of threads currently running will vary depending on the @@ -2331,7 +2331,7 @@ struct __wt_connection { * WiredTiger will start to help evict pages from cache. The number of threads * started will vary depending on the current eviction load. Each eviction * worker thread uses a session from the configured session_max., an integer - * between 1 and 20; default \c 1.} + * between 1 and 20; default \c 8.} * @config{    threads_min, * minimum number of threads WiredTiger will start to help evict pages from * cache. The number of threads currently running will vary depending on the @@ -4429,396 +4429,404 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_EVICTION_STATE 1051 /*! cache: eviction walks abandoned */ #define WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED 1052 +/*! cache: eviction worker thread active */ +#define WT_STAT_CONN_CACHE_EVICTION_ACTIVE_WORKERS 1053 +/*! cache: eviction worker thread created */ +#define WT_STAT_CONN_CACHE_EVICTION_WORKER_CREATED 1054 /*! cache: eviction worker thread evicting pages */ -#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1053 +#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1055 +/*! cache: eviction worker thread removed */ +#define WT_STAT_CONN_CACHE_EVICTION_WORKER_REMOVED 1056 +/*! cache: eviction worker thread stable number */ +#define WT_STAT_CONN_CACHE_EVICTION_STABLE_STATE_WORKERS 1057 /*! cache: failed eviction of pages that exceeded the in-memory maximum */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1054 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1058 /*! cache: files with active eviction walks */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1055 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1059 /*! cache: files with new eviction walks started */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1056 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1060 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1057 +#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1061 /*! cache: hazard pointer check calls */ -#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1058 +#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1062 /*! cache: hazard pointer check entries walked */ -#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1059 +#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1063 /*! cache: hazard pointer maximum array length */ -#define WT_STAT_CONN_CACHE_HAZARD_MAX 1060 +#define WT_STAT_CONN_CACHE_HAZARD_MAX 1064 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1061 +#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1065 /*! cache: in-memory page splits */ -#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1062 +#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1066 /*! cache: internal pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1063 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1067 /*! cache: internal pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1064 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1068 /*! cache: leaf pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1065 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1069 /*! cache: lookaside table insert calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1066 +#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1070 /*! cache: lookaside table remove calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1067 +#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1071 /*! cache: maximum bytes configured */ -#define WT_STAT_CONN_CACHE_BYTES_MAX 1068 +#define WT_STAT_CONN_CACHE_BYTES_MAX 1072 /*! cache: maximum page size at eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1069 +#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1073 /*! cache: modified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1070 +#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1074 /*! cache: modified pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1071 +#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1075 /*! cache: overflow pages read into cache */ -#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1072 +#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1076 /*! cache: overflow values cached in memory */ -#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1073 +#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1077 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1074 +#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1078 /*! cache: page written requiring lookaside records */ -#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1075 +#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1079 /*! cache: pages currently held in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_INUSE 1076 +#define WT_STAT_CONN_CACHE_PAGES_INUSE 1080 /*! cache: pages evicted because they exceeded the in-memory maximum */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1077 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1081 /*! cache: pages evicted because they had chains of deleted items */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1078 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1082 /*! cache: pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP 1079 +#define WT_STAT_CONN_CACHE_EVICTION_APP 1083 /*! cache: pages queued for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1080 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1084 /*! cache: pages queued for urgent eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1081 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1085 /*! cache: pages queued for urgent eviction during walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1082 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1086 /*! cache: pages read into cache */ -#define WT_STAT_CONN_CACHE_READ 1083 +#define WT_STAT_CONN_CACHE_READ 1087 /*! cache: pages read into cache requiring lookaside entries */ -#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1084 +#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1088 /*! cache: pages requested from the cache */ -#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1085 +#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1089 /*! cache: pages seen by eviction walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1086 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1090 /*! cache: pages selected for eviction unable to be evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1087 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1091 /*! cache: pages walked for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK 1088 +#define WT_STAT_CONN_CACHE_EVICTION_WALK 1092 /*! cache: pages written from cache */ -#define WT_STAT_CONN_CACHE_WRITE 1089 +#define WT_STAT_CONN_CACHE_WRITE 1093 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1090 +#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1094 /*! cache: percentage overhead */ -#define WT_STAT_CONN_CACHE_OVERHEAD 1091 +#define WT_STAT_CONN_CACHE_OVERHEAD 1095 /*! cache: tracked bytes belonging to internal pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1092 +#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1096 /*! cache: tracked bytes belonging to leaf pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_LEAF 1093 +#define WT_STAT_CONN_CACHE_BYTES_LEAF 1097 /*! cache: tracked dirty bytes in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1094 +#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1098 /*! cache: tracked dirty pages in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1095 +#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1099 /*! cache: unmodified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1096 +#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1100 /*! connection: auto adjusting condition resets */ -#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1097 +#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1101 /*! connection: auto adjusting condition wait calls */ -#define WT_STAT_CONN_COND_AUTO_WAIT 1098 +#define WT_STAT_CONN_COND_AUTO_WAIT 1102 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1099 +#define WT_STAT_CONN_FILE_OPEN 1103 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1100 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1104 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1101 +#define WT_STAT_CONN_MEMORY_FREE 1105 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1102 +#define WT_STAT_CONN_MEMORY_GROW 1106 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1103 +#define WT_STAT_CONN_COND_WAIT 1107 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1104 +#define WT_STAT_CONN_RWLOCK_READ 1108 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1105 +#define WT_STAT_CONN_RWLOCK_WRITE 1109 /*! connection: total fsync I/Os */ -#define WT_STAT_CONN_FSYNC_IO 1106 +#define WT_STAT_CONN_FSYNC_IO 1110 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1107 +#define WT_STAT_CONN_READ_IO 1111 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1108 +#define WT_STAT_CONN_WRITE_IO 1112 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1109 +#define WT_STAT_CONN_CURSOR_CREATE 1113 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1110 +#define WT_STAT_CONN_CURSOR_INSERT 1114 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1111 +#define WT_STAT_CONN_CURSOR_NEXT 1115 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1112 +#define WT_STAT_CONN_CURSOR_PREV 1116 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1113 +#define WT_STAT_CONN_CURSOR_REMOVE 1117 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1114 +#define WT_STAT_CONN_CURSOR_RESET 1118 /*! cursor: cursor restarted searches */ -#define WT_STAT_CONN_CURSOR_RESTART 1115 +#define WT_STAT_CONN_CURSOR_RESTART 1119 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1116 +#define WT_STAT_CONN_CURSOR_SEARCH 1120 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1117 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1121 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1118 +#define WT_STAT_CONN_CURSOR_UPDATE 1122 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1119 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1123 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1120 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1124 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1121 +#define WT_STAT_CONN_DH_SWEEP_REF 1125 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1122 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1126 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1123 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1127 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1124 +#define WT_STAT_CONN_DH_SWEEP_TOD 1128 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1125 +#define WT_STAT_CONN_DH_SWEEPS 1129 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1126 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1130 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1127 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1131 /*! lock: checkpoint lock acquisitions */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1128 +#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1132 /*! lock: checkpoint lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1129 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1133 /*! lock: checkpoint lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1130 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1134 /*! lock: handle-list lock acquisitions */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_COUNT 1131 +#define WT_STAT_CONN_LOCK_HANDLE_LIST_COUNT 1135 /*! lock: handle-list lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_APPLICATION 1132 +#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_APPLICATION 1136 /*! lock: handle-list lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_INTERNAL 1133 +#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_INTERNAL 1137 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1134 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1138 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1135 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1139 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1136 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1140 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1137 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1141 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1138 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1142 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1139 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1143 /*! lock: table lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_COUNT 1140 +#define WT_STAT_CONN_LOCK_TABLE_COUNT 1144 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1141 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1145 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1142 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1146 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1143 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1147 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1144 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1148 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1145 +#define WT_STAT_CONN_LOG_SLOT_RACES 1149 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1146 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1150 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1147 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1151 /*! log: consolidated slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1148 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1152 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1149 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1153 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1150 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1154 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1151 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1155 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1152 +#define WT_STAT_CONN_LOG_FLUSH 1156 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1153 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1157 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1154 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1158 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1155 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1159 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1156 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1160 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1157 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1161 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1158 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1162 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1159 +#define WT_STAT_CONN_LOG_SCANS 1163 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1160 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1164 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1161 +#define WT_STAT_CONN_LOG_WRITE_LSN 1165 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1162 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1166 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1163 +#define WT_STAT_CONN_LOG_SYNC 1167 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1164 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1168 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1165 +#define WT_STAT_CONN_LOG_SYNC_DIR 1169 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1166 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1170 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1167 +#define WT_STAT_CONN_LOG_WRITES 1171 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1168 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1172 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1169 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1173 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1170 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1174 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1171 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1175 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1172 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1176 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1173 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1177 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1174 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1178 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1175 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1179 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1176 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1180 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1177 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1181 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1178 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1182 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1179 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1183 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1180 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1184 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1181 +#define WT_STAT_CONN_REC_PAGES 1185 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1182 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1186 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1183 +#define WT_STAT_CONN_REC_PAGE_DELETE 1187 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1184 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1188 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1185 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1189 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1186 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1190 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1187 +#define WT_STAT_CONN_SESSION_OPEN 1191 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1188 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1192 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1189 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1193 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1190 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1194 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1191 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1195 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1192 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1196 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1193 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1197 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1194 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1198 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1195 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1199 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1196 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1200 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1197 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1201 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1198 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1202 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1199 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1203 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1200 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1204 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1201 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1205 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1202 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1206 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1203 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1207 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1204 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1208 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1205 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1209 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1206 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1210 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1207 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1211 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1208 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1212 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1209 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1213 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1210 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1214 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1211 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1215 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1212 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1216 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1213 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1217 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1214 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1218 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1215 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1219 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1216 +#define WT_STAT_CONN_PAGE_SLEEP 1220 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1217 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1221 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1218 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1222 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1219 +#define WT_STAT_CONN_TXN_BEGIN 1223 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1220 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1224 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1221 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1225 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1222 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1226 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1223 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1227 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1224 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1228 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1225 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1229 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1226 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1230 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1227 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1231 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1228 +#define WT_STAT_CONN_TXN_CHECKPOINT 1232 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1229 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1233 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1230 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1234 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1231 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1235 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1232 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1236 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1233 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1237 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1234 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1238 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1235 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1239 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1236 +#define WT_STAT_CONN_TXN_SYNC 1240 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1237 +#define WT_STAT_CONN_TXN_COMMIT 1241 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1238 +#define WT_STAT_CONN_TXN_ROLLBACK 1242 /*! * @} diff --git a/src/support/stat.c b/src/support/stat.c index 66710473ab9..167d17137ce 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -677,7 +677,11 @@ static const char * const __stats_connection_desc[] = { "cache: eviction server unable to reach eviction goal", "cache: eviction state", "cache: eviction walks abandoned", + "cache: eviction worker thread active", + "cache: eviction worker thread created", "cache: eviction worker thread evicting pages", + "cache: eviction worker thread removed", + "cache: eviction worker thread stable number", "cache: failed eviction of pages that exceeded the in-memory maximum", "cache: files with active eviction walks", "cache: files with new eviction walks started", @@ -958,7 +962,11 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_slow = 0; /* not clearing cache_eviction_state */ stats->cache_eviction_walks_abandoned = 0; + /* not clearing cache_eviction_active_workers */ + stats->cache_eviction_worker_created = 0; stats->cache_eviction_worker_evicting = 0; + stats->cache_eviction_worker_removed = 0; + /* not clearing cache_eviction_stable_state_workers */ stats->cache_eviction_force_fail = 0; /* not clearing cache_eviction_walks_active */ stats->cache_eviction_walks_started = 0; @@ -1232,8 +1240,16 @@ __wt_stat_connection_aggregate( to->cache_eviction_state += WT_STAT_READ(from, cache_eviction_state); to->cache_eviction_walks_abandoned += WT_STAT_READ(from, cache_eviction_walks_abandoned); + to->cache_eviction_active_workers += + WT_STAT_READ(from, cache_eviction_active_workers); + to->cache_eviction_worker_created += + WT_STAT_READ(from, cache_eviction_worker_created); to->cache_eviction_worker_evicting += WT_STAT_READ(from, cache_eviction_worker_evicting); + to->cache_eviction_worker_removed += + WT_STAT_READ(from, cache_eviction_worker_removed); + to->cache_eviction_stable_state_workers += + WT_STAT_READ(from, cache_eviction_stable_state_workers); to->cache_eviction_force_fail += WT_STAT_READ(from, cache_eviction_force_fail); to->cache_eviction_walks_active += diff --git a/src/support/thread_group.c b/src/support/thread_group.c index a89468c367a..d04f8977a9a 100644 --- a/src/support/thread_group.c +++ b/src/support/thread_group.c @@ -71,12 +71,12 @@ __thread_group_grow( /* * __thread_group_shrink -- - * Decrease the number of running threads in the group, and free any + * Decrease the number of running threads in the group. Optionally free any * memory associated with slots larger than the new count. */ static int __thread_group_shrink(WT_SESSION_IMPL *session, - WT_THREAD_GROUP *group, uint32_t new_count) + WT_THREAD_GROUP *group, uint32_t new_count, bool free_thread) { WT_DECL_RET; WT_SESSION *wt_session; @@ -105,14 +105,15 @@ __thread_group_shrink(WT_SESSION_IMPL *session, WT_TRET(__wt_thread_join(session, thread->tid)); thread->tid = 0; } - - if (thread->session != NULL) { - wt_session = (WT_SESSION *)thread->session; - WT_TRET(wt_session->close(wt_session, NULL)); - thread->session = NULL; + if (free_thread) { + if (thread->session != NULL) { + wt_session = (WT_SESSION *)thread->session; + WT_TRET(wt_session->close(wt_session, NULL)); + thread->session = NULL; + } + __wt_free(session, thread); + group->threads[current_slot] = NULL; } - __wt_free(session, thread); - group->threads[current_slot] = NULL; } /* Update the thread group state to match our changes */ @@ -145,11 +146,14 @@ __thread_group_resize( if (new_min == group->min && new_max == group->max) return (0); + if (new_min > new_max) + return (EINVAL); + /* - * Coll shrink to reduce the number of thread structures and running + * Call shrink to reduce the number of thread structures and running * threads if required by the change in group size. */ - WT_RET(__thread_group_shrink(session, group, new_max)); + WT_RET(__thread_group_shrink(session, group, new_max, true)); /* * Only reallocate the thread array if it is the largest ever, since @@ -289,7 +293,7 @@ __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) WT_ASSERT(session, __wt_rwlock_islocked(session, &group->lock)); /* Shut down all threads and free associated resources. */ - WT_TRET(__thread_group_shrink(session, group, 0)); + WT_TRET(__thread_group_shrink(session, group, 0, true)); __wt_free(session, group->threads); @@ -332,3 +336,30 @@ __wt_thread_group_start_one( return (ret); } + +/* + * __wt_thread_group_stop_one -- + * Stop one thread if possible. + */ +int +__wt_thread_group_stop_one( + WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool wait) +{ + WT_DECL_RET; + + if (group->current_threads <= group->min) + return (0); + + if (wait) + __wt_writelock(session, &group->lock); + else if (__wt_try_writelock(session, &group->lock) != 0) + return (0); + + /* Recheck the bounds now that we hold the lock */ + if (group->current_threads > group->min) + WT_TRET(__thread_group_shrink( + session, group, group->current_threads - 1, false)); + __wt_writeunlock(session, &group->lock); + + return (ret); +} diff --git a/tools/wtstats/stat_data.py b/tools/wtstats/stat_data.py index 5d385cda705..a94ce524ae3 100644 --- a/tools/wtstats/stat_data.py +++ b/tools/wtstats/stat_data.py @@ -128,6 +128,8 @@ no_clear_list = [ 'cache: eviction currently operating in aggressive mode', 'cache: eviction empty score', 'cache: eviction state', + 'cache: eviction worker thread active', + 'cache: eviction worker thread stable number', 'cache: files with active eviction walks', 'cache: maximum bytes configured', 'cache: maximum page size at eviction', -- cgit v1.2.1 From 4c461ebc2009d73a9b6e2ef0ee57bdfeba270064 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Thu, 29 Dec 2016 14:56:21 -0500 Subject: WT-3104 Fix single threaded eviction configurations. (#3221) --- bench/wtperf/runners/evict-btree-1.wtperf | 2 +- bench/wtperf/runners/evict-lsm-1.wtperf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bench/wtperf/runners/evict-btree-1.wtperf b/bench/wtperf/runners/evict-btree-1.wtperf index 24da4dd7902..741101d083f 100644 --- a/bench/wtperf/runners/evict-btree-1.wtperf +++ b/bench/wtperf/runners/evict-btree-1.wtperf @@ -1,5 +1,5 @@ # wtperf options file: evict btree configuration -conn_config="cache_size=50M" +conn_config="cache_size=50M,eviction=(threads_max=1)" table_config="type=file" icount=10000000 report_interval=5 diff --git a/bench/wtperf/runners/evict-lsm-1.wtperf b/bench/wtperf/runners/evict-lsm-1.wtperf index ad885d98eb7..641a85dc889 100644 --- a/bench/wtperf/runners/evict-lsm-1.wtperf +++ b/bench/wtperf/runners/evict-lsm-1.wtperf @@ -1,5 +1,5 @@ # wtperf options file: evict lsm configuration -conn_config="cache_size=50M,lsm_manager=(worker_thread_max=6)" +conn_config="cache_size=50M,eviction=(threads_max=1),lsm_manager=(worker_thread_max=6)" table_config="type=lsm,lsm=(chunk_size=2M),os_cache_dirty_max=16MB" compact=true icount=10000000 -- cgit v1.2.1 From 5af64580f5be08d2f8900b96a83d29a3ae2cf04a Mon Sep 17 00:00:00 2001 From: sueloverso Date: Wed, 4 Jan 2017 00:55:11 -0500 Subject: SERVER-16796 Recovery progress via verbose messages. (#3225) --- dist/api_data.py | 1 + dist/flags.py | 1 + src/config/config_def.c | 35 ++++++++++++++++++++--------------- src/conn/conn_api.c | 1 + src/include/flags.h | 19 ++++++++++--------- src/include/wiredtiger.in | 12 ++++++------ src/log/log.c | 9 +++++++++ src/txn/txn_recover.c | 2 +- 8 files changed, 49 insertions(+), 31 deletions(-) diff --git a/dist/api_data.py b/dist/api_data.py index 04071a84332..324d1e4f281 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -537,6 +537,7 @@ connection_runtime_config = [ 'rebalance', 'reconcile', 'recovery', + 'recovery_progress', 'salvage', 'shared_cache', 'split', diff --git a/dist/flags.py b/dist/flags.py index 320bd8f6fb9..70e18712839 100644 --- a/dist/flags.py +++ b/dist/flags.py @@ -76,6 +76,7 @@ flags = { 'VERB_REBALANCE', 'VERB_RECONCILE', 'VERB_RECOVERY', + 'VERB_RECOVERY_PROGRESS', 'VERB_SALVAGE', 'VERB_SHARED_CACHE', 'VERB_SPLIT', diff --git a/src/config/config_def.c b/src/config/config_def.c index 83c1436eade..6a93c1d05e2 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -149,9 +149,10 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," - "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," - "\"shared_cache\",\"split\",\"temporary\",\"thread_group\"," - "\"transaction\",\"verify\",\"version\",\"write\"]", + "\"read\",\"rebalance\",\"reconcile\",\"recovery\"," + "\"recovery_progress\",\"salvage\",\"shared_cache\",\"split\"," + "\"temporary\",\"thread_group\",\"transaction\",\"verify\"," + "\"version\",\"write\"]", NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -751,9 +752,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," - "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," - "\"shared_cache\",\"split\",\"temporary\",\"thread_group\"," - "\"transaction\",\"verify\",\"version\",\"write\"]", + "\"read\",\"rebalance\",\"reconcile\",\"recovery\"," + "\"recovery_progress\",\"salvage\",\"shared_cache\",\"split\"," + "\"temporary\",\"thread_group\",\"transaction\",\"verify\"," + "\"version\",\"write\"]", NULL, 0 }, { "write_through", "list", NULL, "choices=[\"data\",\"log\"]", @@ -837,9 +839,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," - "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," - "\"shared_cache\",\"split\",\"temporary\",\"thread_group\"," - "\"transaction\",\"verify\",\"version\",\"write\"]", + "\"read\",\"rebalance\",\"reconcile\",\"recovery\"," + "\"recovery_progress\",\"salvage\",\"shared_cache\",\"split\"," + "\"temporary\",\"thread_group\",\"transaction\",\"verify\"," + "\"version\",\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { "write_through", "list", @@ -918,9 +921,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," - "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," - "\"shared_cache\",\"split\",\"temporary\",\"thread_group\"," - "\"transaction\",\"verify\",\"version\",\"write\"]", + "\"read\",\"rebalance\",\"reconcile\",\"recovery\"," + "\"recovery_progress\",\"salvage\",\"shared_cache\",\"split\"," + "\"temporary\",\"thread_group\",\"transaction\",\"verify\"," + "\"version\",\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { "write_through", "list", @@ -999,9 +1003,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," - "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," - "\"shared_cache\",\"split\",\"temporary\",\"thread_group\"," - "\"transaction\",\"verify\",\"version\",\"write\"]", + "\"read\",\"rebalance\",\"reconcile\",\"recovery\"," + "\"recovery_progress\",\"salvage\",\"shared_cache\",\"split\"," + "\"temporary\",\"thread_group\",\"transaction\",\"verify\"," + "\"version\",\"write\"]", NULL, 0 }, { "write_through", "list", NULL, "choices=[\"data\",\"log\"]", diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 474b8bbad8a..50617240d38 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1811,6 +1811,7 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) { "rebalance", WT_VERB_REBALANCE }, { "reconcile", WT_VERB_RECONCILE }, { "recovery", WT_VERB_RECOVERY }, + { "recovery_progress", WT_VERB_RECOVERY_PROGRESS }, { "salvage", WT_VERB_SALVAGE }, { "shared_cache", WT_VERB_SHARED_CACHE }, { "split", WT_VERB_SPLIT }, diff --git a/src/include/flags.h b/src/include/flags.h index e7a5ba066df..2f0c207078a 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -102,15 +102,16 @@ #define WT_VERB_REBALANCE 0x00008000 #define WT_VERB_RECONCILE 0x00010000 #define WT_VERB_RECOVERY 0x00020000 -#define WT_VERB_SALVAGE 0x00040000 -#define WT_VERB_SHARED_CACHE 0x00080000 -#define WT_VERB_SPLIT 0x00100000 -#define WT_VERB_TEMPORARY 0x00200000 -#define WT_VERB_THREAD_GROUP 0x00400000 -#define WT_VERB_TRANSACTION 0x00800000 -#define WT_VERB_VERIFY 0x01000000 -#define WT_VERB_VERSION 0x02000000 -#define WT_VERB_WRITE 0x04000000 +#define WT_VERB_RECOVERY_PROGRESS 0x00040000 +#define WT_VERB_SALVAGE 0x00080000 +#define WT_VERB_SHARED_CACHE 0x00100000 +#define WT_VERB_SPLIT 0x00200000 +#define WT_VERB_TEMPORARY 0x00400000 +#define WT_VERB_THREAD_GROUP 0x00800000 +#define WT_VERB_TRANSACTION 0x01000000 +#define WT_VERB_VERIFY 0x02000000 +#define WT_VERB_VERSION 0x04000000 +#define WT_VERB_WRITE 0x08000000 #define WT_VISIBILITY_ERR 0x00000080 /* * flags section: END diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 7c27baa9395..90989cc679d 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1985,9 +1985,9 @@ struct __wt_connection { * "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c "lsm"\, * \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c * "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c - * "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c - * "thread_group"\, \c "transaction"\, \c "verify"\, \c "version"\, \c - * "write"; default empty.} + * "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, + * \c "temporary"\, \c "thread_group"\, \c "transaction"\, \c "verify"\, + * \c "version"\, \c "write"; default empty.} * @configend * @errors */ @@ -2516,9 +2516,9 @@ struct __wt_connection { * "checkpoint"\, \c "compact"\, \c "evict"\, \c "evictserver"\, \c "fileops"\, * \c "handleops"\, \c "log"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c * "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c - * "recovery"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, - * \c "thread_group"\, \c "transaction"\, \c "verify"\, \c "version"\, \c - * "write"; default empty.} + * "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c + * "split"\, \c "temporary"\, \c "thread_group"\, \c "transaction"\, \c + * "verify"\, \c "version"\, \c "write"; default empty.} * @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to * files. Ignored on non-Windows systems. Options are given as a list\, such * as "write_through=[data]". Configuring \c write_through requires diff --git a/src/log/log.c b/src/log/log.c index fb3935abf81..74c5442d405 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -1674,6 +1674,10 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, &log_fh, WT_LOG_FILENAME, start_lsn.l.file, WT_LOG_OPEN_VERIFY)); WT_ERR(__wt_filesize(session, log_fh, &log_size)); rd_lsn = start_lsn; + if (LF_ISSET(WT_LOGSCAN_RECOVER)) + __wt_verbose(session, WT_VERB_RECOVERY_PROGRESS, + "Recovering log %" PRIu32 " through %" PRIu32, + rd_lsn.l.file, end_lsn.l.file); WT_ERR(__wt_scr_alloc(session, WT_LOG_ALIGN, &buf)); WT_ERR(__wt_scr_alloc(session, 0, &decryptitem)); @@ -1722,6 +1726,11 @@ advance: WT_ERR(__log_openfile(session, &log_fh, WT_LOG_FILENAME, rd_lsn.l.file, WT_LOG_OPEN_VERIFY)); + if (LF_ISSET(WT_LOGSCAN_RECOVER)) + __wt_verbose(session, WT_VERB_RECOVERY_PROGRESS, + "Recovering log %" PRIu32 + " through %" PRIu32, + rd_lsn.l.file, end_lsn.l.file); WT_ERR(__wt_filesize(session, log_fh, &log_size)); eol = false; continue; diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c index a6390dcbd06..2d8a77a69e6 100644 --- a/src/txn/txn_recover.c +++ b/src/txn/txn_recover.c @@ -501,7 +501,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session) * Pass WT_LOGSCAN_RECOVER so that old logs get truncated. */ r.metadata_only = false; - __wt_verbose(session, WT_VERB_RECOVERY, + __wt_verbose(session, WT_VERB_RECOVERY | WT_VERB_RECOVERY_PROGRESS, "Main recovery loop: starting at %" PRIu32 "/%" PRIu32, r.ckpt_lsn.l.file, r.ckpt_lsn.l.offset); WT_ERR(__wt_log_needs_recovery(session, &r.ckpt_lsn, &needs_rec)); -- cgit v1.2.1 From 49e48315235a189ed769c43e35e6a73b9a074fa2 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 4 Jan 2017 00:57:52 -0500 Subject: WT-3100 test bug: format is weighted to delete, insert, then write operations (#3219) test/format was weighted to delete, insert, then write operations, which meant that configuring insert=80 might have no effect, if the randomly assigned percentage of delete operations was 95. Rewrite the code that calculates operation percentages to assign operation percentages in a random order, add an explicit read percentage instead of making all non-allocated operations default to reads. --- test/format/config.c | 104 +++++++++++++++++++++++++++++++++++++++++---------- test/format/config.h | 18 +++++---- test/format/format.h | 1 + 3 files changed, 97 insertions(+), 26 deletions(-) diff --git a/test/format/config.c b/test/format/config.c index cf922b5db04..43447c9ba02 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -44,6 +44,7 @@ static void config_map_compression(const char *, u_int *); static void config_map_encryption(const char *, u_int *); static void config_map_file_type(const char *, u_int *); static void config_map_isolation(const char *, u_int *); +static void config_pct(void); static void config_reset(void); /* @@ -159,31 +160,19 @@ config_setup(void) config_encryption(); config_isolation(); config_lrt(); + config_pct(); /* - * Periodically, set the delete percentage to 0 so salvage gets run, - * as long as the delete percentage isn't nailed down. - * Don't do it on the first run, all our smoke tests would hit it. - */ - if (!g.replay && g.run_cnt % 10 == 9 && !config_is_perm("delete_pct")) - config_single("delete_pct=0", 0); - - /* - * If this is an LSM run, set the cache size and crank up the insert - * percentage. + * If this is an LSM run, ensure cache size sanity. + * Ensure there is at least 1MB of cache per thread. */ - if (DATASOURCE("lsm")) { - if (!config_is_perm("cache")) + if (!config_is_perm("cache")) { + if (DATASOURCE("lsm")) g.c_cache = 30 * g.c_chunk_size; - - if (!config_is_perm("insert_pct")) - g.c_insert_pct = mmrand(NULL, 50, 85); + if (g.c_cache < g.c_threads) + g.c_cache = g.c_threads; } - /* Ensure there is at least 1MB of cache per thread. */ - if (!config_is_perm("cache") && g.c_cache < g.c_threads) - g.c_cache = g.c_threads; - /* Give in-memory configuration a final review. */ config_in_memory_check(); @@ -481,6 +470,83 @@ config_lrt(void) } } +/* + * config_pct -- + * Configure operation percentages. + */ +static void +config_pct(void) +{ + static struct { + const char *name; /* Operation */ + uint32_t *vp; /* Value store */ + u_int order; /* Order of assignment */ + } list[] = { +#define CONFIG_DELETE_ENTRY 0 + { "delete_pct", &g.c_delete_pct, 0 }, + { "insert_pct", &g.c_insert_pct, 0 }, + { "read_pct", &g.c_read_pct, 0 }, + { "write_pct", &g.c_write_pct, 0 }, + }; + u_int i, max_order, max_slot, n, pct; + + /* + * Walk the list of operations, checking for an illegal configuration + * and creating a random order in the list. + */ + pct = 0; + for (i = 0; i < WT_ELEMENTS(list); ++i) + if (config_is_perm(list[i].name)) + pct += *list[i].vp; + else + list[i].order = mmrand(NULL, 0, 1000); + if (pct > 100) + testutil_die(EINVAL, + "operation percentages total to more than 100%%"); + + /* + * If the delete percentage isn't nailed down, periodically set it to + * 0 so salvage gets run. Don't do it on the first run, all our smoke + * tests would hit it. + */ + if (!config_is_perm("delete_pct") && !g.replay && g.run_cnt % 10 == 9) { + list[CONFIG_DELETE_ENTRY].order = 0; + *list[CONFIG_DELETE_ENTRY].vp = 0; + } + + /* + * Walk the list, allocating random numbers of operations in a random + * order. + * + * If the "order" field is non-zero, we need to create a value for this + * operation. Find the largest order field in the array; if one non-zero + * order field is found, it's the last entry and gets the remainder of + * the operations. + */ + for (pct = 100 - pct;;) { + for (i = n = + max_order = max_slot = 0; i < WT_ELEMENTS(list); ++i) { + if (list[i].order != 0) + ++n; + if (list[i].order > max_order) { + max_order = list[i].order; + max_slot = i; + } + } + if (n == 0) + break; + if (n == 1) { + *list[max_slot].vp = pct; + break; + } + *list[max_slot].vp = mmrand(NULL, 0, pct); + list[max_slot].order = 0; + pct -= *list[max_slot].vp; + } + testutil_assert(g.c_delete_pct + + g.c_insert_pct + g.c_read_pct + g.c_write_pct == 100); +} + /* * config_error -- * Display configuration information on error. diff --git a/test/format/config.h b/test/format/config.h index e4f7af2e1b2..e3e1e73a786 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -131,7 +131,7 @@ static CONFIG c[] = { { "delete_pct", "percent operations that are deletes", - 0x0, 0, 45, 90, &g.c_delete_pct, NULL }, + C_IGNORE, 0, 0, 100, &g.c_delete_pct, NULL }, { "dictionary", "if values are dictionary compressed", /* 20% */ @@ -171,7 +171,7 @@ static CONFIG c[] = { { "insert_pct", "percent operations that are inserts", - 0x0, 0, 45, 90, &g.c_insert_pct, NULL }, + C_IGNORE, 0, 0, 100, &g.c_insert_pct, NULL }, { "internal_key_truncation", "if internal keys are truncated", /* 95% */ @@ -254,6 +254,14 @@ static CONFIG c[] = { "quiet run (same as -q)", C_IGNORE|C_BOOL, 0, 0, 0, &g.c_quiet, NULL }, + { "read_pct", + "percent operations that are reads", + C_IGNORE, 0, 0, 100, &g.c_read_pct, NULL }, + + { "rebalance", + "rebalance testing", /* 100% */ + C_BOOL, 100, 1, 0, &g.c_rebalance, NULL }, + { "repeat_data_pct", "percent duplicate values in row- or var-length column-stores", 0x0, 0, 90, 90, &g.c_repeat_data_pct, NULL }, @@ -270,10 +278,6 @@ static CONFIG c[] = { "the number of runs", C_IGNORE, 0, UINT_MAX, UINT_MAX, &g.c_runs, NULL }, - { "rebalance", - "rebalance testing", /* 100% */ - C_BOOL, 100, 1, 0, &g.c_rebalance, NULL }, - { "salvage", "salvage testing", /* 100% */ C_BOOL, 100, 1, 0, &g.c_salvage, NULL }, @@ -320,7 +324,7 @@ static CONFIG c[] = { { "write_pct", "percent operations that are writes", - 0x0, 0, 90, 90, &g.c_write_pct, NULL }, + C_IGNORE, 0, 0, 100, &g.c_write_pct, NULL }, { NULL, NULL, 0x0, 0, 0, 0, NULL, NULL } }; diff --git a/test/format/format.h b/test/format/format.h index c1f4875dbb2..6bb44410acc 100644 --- a/test/format/format.h +++ b/test/format/format.h @@ -192,6 +192,7 @@ typedef struct { uint32_t c_reverse; uint32_t c_rows; uint32_t c_runs; + uint32_t c_read_pct; uint32_t c_rebalance; uint32_t c_salvage; uint32_t c_split_pct; -- cgit v1.2.1 From b47f127c8d935e2a9815970eb1309d6e4b417549 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 4 Jan 2017 01:13:25 -0500 Subject: WT-3099 lint: static function declarations, non-text characters in documentation (#3218) * Remove characters outside the ISO/IEC 8859-1 character set in documentation. Add --encoding=iso-8859-1 to the aspell check line to avoid in the future. * __dump_txn_state and __dump_cache were prototyped static but not declared static. * Clang sanitizer complaint: ret set but never read because the error label sets ret explicitly. --- dist/s_docs | 3 ++- src/docs/spell.ok | 3 ++- src/docs/tune-page-size-and-comp.dox | 8 ++++---- src/evict/evict_lru.c | 10 ++++------ src/utilities/util_dump.c | 16 ++++++++-------- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/dist/s_docs b/dist/s_docs index f4332257193..6ebffb947ec 100755 --- a/dist/s_docs +++ b/dist/s_docs @@ -96,7 +96,8 @@ spellchk() type aspell > /dev/null 2>&1 || return (cd ../src/docs && - cat *.dox | aspell --lang=en --personal=./spell.ok list) | + cat *.dox | + aspell --encoding=iso-8859-1 --lang=en --personal=./spell.ok list) | sort -u > $t test -s $t && { echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=" diff --git a/src/docs/spell.ok b/src/docs/spell.ok index f87f24cef5c..bc2e16b1122 100644 --- a/src/docs/spell.ok +++ b/src/docs/spell.ok @@ -50,8 +50,8 @@ LDFLAGS LIBS LLVM LOGREC -LRVv LRU +LRVv LSB LSM LZ @@ -168,6 +168,7 @@ dNLen dNOff dT dataN +database's dataitem dataset datasets diff --git a/src/docs/tune-page-size-and-comp.dox b/src/docs/tune-page-size-and-comp.dox index 70e9875bcc4..96b0fda2333 100644 --- a/src/docs/tune-page-size-and-comp.dox +++ b/src/docs/tune-page-size-and-comp.dox @@ -40,7 +40,7 @@ of these blocks is defined by a parameter called allocation_size, which is the underlying unit of allocation for the file the data gets stored in. An application might choose to have data compressed before it gets stored to disk by enabling block compression. - - A database’s tables are usually much larger than the main memory available. + - A database's tables are usually much larger than the main memory available. Not all of the data can be kept in memory at any given time. A process called eviction takes care of making space for new data by freeing the memory of data infrequently accessed. An eviction server regularly finds in-memory pages that @@ -52,7 +52,7 @@ associated key is used to refer to an in-memory page. In the case of this page not being in memory, appropriate on-disk page(s) are read and an in-memory page constructed (the opposite of reconciliation). A data structure is maintained on every in-memory page to store any insertions or modifications to the data done -on that page. As more and more data gets written to this page, the page’s memory +on that page. As more and more data gets written to this page, the page's memory footprint keeps growing. - An application can choose to set the maximum size a page is allowed to grow in-memory. A default size is set by WiredTiger if the application doesn't @@ -81,7 +81,7 @@ There are additional configuration settings that tune more esoteric and specialized data. Those are included for completeness but are rarely changed. @subsection memory_page_max memory_page_max -The maximum size a table’s page is allowed to grow to in memory before being +The maximum size a table's page is allowed to grow to in memory before being reconciled to disk. - An integer, with acceptable values between 512B and 10TB - Default size: 5 MB @@ -98,7 +98,7 @@ both require exclusive access to the page which makes an application's write operations wait. Having a large memory_page_max means that the pages will need to be split and reconciled less often. But when that happens, the duration that an exclusive access to the page is required is longer, increasing the latency of -an application’s insert or update operations. Conversely, having a smaller +an application's insert or update operations. Conversely, having a smaller memory_page_max reduces the time taken for splitting and reconciling the pages, but causes it to happen more frequently, forcing more frequent but shorter exclusive accesses to the pages. diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 485fd0e6d40..a03c1f16dec 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1619,7 +1619,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, if (page->read_gen == WT_READGEN_NOTSET) __wt_cache_read_gen_new(session, page); - /* Pages we no longer need (clean or dirty), are found money. */ + /* Pages being forcibly evicted go on the urgent queue. */ if (page->read_gen == WT_READGEN_OLDEST || page->memory_footprint >= btree->splitmempage) { WT_STAT_CONN_INCR( @@ -1629,7 +1629,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, continue; } - /* Pages that are empty or from dead trees are also good. */ + /* Pages that are empty or from dead trees are fast-tracked. */ if (__wt_page_is_empty(page) || F_ISSET(session->dhandle, WT_DHANDLE_DEAD)) goto fast; @@ -2154,13 +2154,11 @@ __wt_evict_priority_clear(WT_SESSION_IMPL *session) } #ifdef HAVE_DIAGNOSTIC -static int __dump_txn_state(WT_SESSION_IMPL *, FILE *fp); -static int __dump_cache(WT_SESSION_IMPL *, FILE *fp); /* * __dump_txn_state -- * Output debugging information about the global transaction state. */ -int +static int __dump_txn_state(WT_SESSION_IMPL *session, FILE *fp) { WT_CONNECTION_IMPL *conn; @@ -2259,7 +2257,7 @@ __dump_txn_state(WT_SESSION_IMPL *session, FILE *fp) * __dump_cache -- * Output debugging information about the size of the files in cache. */ -int +static int __dump_cache(WT_SESSION_IMPL *session, FILE *fp) { WT_CONNECTION_IMPL *conn; diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c index 95cd39322c4..3f8b4a49dfe 100644 --- a/src/utilities/util_dump.c +++ b/src/utilities/util_dump.c @@ -81,13 +81,13 @@ util_dump(WT_SESSION *session, int argc, char *argv[]) return (usage()); if (json && - ((ret = dump_json_begin(session)) != 0 || - (ret = dump_prefix(session, hex, json)) != 0)) + (dump_json_begin(session) != 0 || + dump_prefix(session, hex, json) != 0)) goto err; for (i = 0; i < argc; i++) { if (json && i > 0) - if ((ret = dump_json_separator(session)) != 0) + if (dump_json_separator(session) != 0) goto err; free(name); free(simplename); @@ -120,7 +120,7 @@ util_dump(WT_SESSION *session, int argc, char *argv[]) } if ((simplename = strdup(name)) == NULL) { - ret = util_err(session, errno, NULL); + (void)util_err(session, errno, NULL); goto err; } if ((p = strchr(simplename, '(')) != NULL) @@ -128,19 +128,19 @@ util_dump(WT_SESSION *session, int argc, char *argv[]) if (dump_config(session, simplename, cursor, hex, json) != 0) goto err; - if ((ret = dump_record(cursor, reverse, json)) != 0) + if (dump_record(cursor, reverse, json) != 0) goto err; - if (json && (ret = dump_json_table_end(session)) != 0) + if (json && dump_json_table_end(session) != 0) goto err; ret = cursor->close(cursor); cursor = NULL; if (ret != 0) { - ret = util_err(session, ret, NULL); + (void)util_err(session, ret, NULL); goto err; } } - if (json && ((ret = dump_json_end(session)) != 0)) + if (json && dump_json_end(session) != 0) goto err; if (0) { -- cgit v1.2.1 From 3a211a245a3b9198fdc0618bd0e2d3d97ff8171c Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 4 Jan 2017 09:53:58 -0500 Subject: WT-3100 test bug: format is weighted to delete, insert, then write operations. Bug fix. --- test/format/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/format/config.c b/test/format/config.c index 43447c9ba02..50430fe073e 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -499,7 +499,7 @@ config_pct(void) if (config_is_perm(list[i].name)) pct += *list[i].vp; else - list[i].order = mmrand(NULL, 0, 1000); + list[i].order = mmrand(NULL, 1, 1000); if (pct > 100) testutil_die(EINVAL, "operation percentages total to more than 100%%"); -- cgit v1.2.1 From 0f8cb7b38a85e1afe6c91d49222b8baab4525ad0 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 4 Jan 2017 11:03:38 -0500 Subject: WT-2898 Improve performance of eviction-heavy workloads by dynamically controlling the number of eviction threads lint fix --- src/evict/evict_lru.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index a03c1f16dec..08cafcf32ed 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -559,7 +559,7 @@ __evict_pass(WT_SESSION_IMPL *session) prev = now; if (conn->evict_threads.threads[0]->session == session) - __evict_tune_workers(session); + WT_RET(__evict_tune_workers(session)); /* * Increment the shared read generation. Do this occasionally * even if eviction is not currently required, so that pages -- cgit v1.2.1 From 4cde2ec263744da56807a84309d0f910c6ab2636 Mon Sep 17 00:00:00 2001 From: Eric Milkie Date: Wed, 4 Jan 2017 15:24:13 -0500 Subject: WT-3109 correct named snapshots documentation (#3231) --- src/docs/transactions.dox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/docs/transactions.dox b/src/docs/transactions.dox index bbbd2d52296..3b438eda366 100644 --- a/src/docs/transactions.dox +++ b/src/docs/transactions.dox @@ -141,7 +141,7 @@ as if the transaction started at the time of the WT_SESSION::snapshot call that created the snapshot. Named snapshots keep data pinned in cache as if a real transaction were -running for the time that the named transaction is active. The resources +running for the time that the named snapshot is active. The resources associated with named snapshots should be released by calling WT_SESSION::snapshot with a configuration that includes "drop=". See WT_SESSION::snapshot documentation for details of -- cgit v1.2.1 From 8255cfa17271e33cd1de1c240c49e9ec511aa4c6 Mon Sep 17 00:00:00 2001 From: Sulabh Mahajan Date: Sat, 7 Jan 2017 03:54:40 +1100 Subject: WT-3108 Add disk memory size to verify debug output (#3226) * WT-3108 Add disk memory size to verify debug output * Check for page->dsk to be not NULL before accessing disk mem size --- src/btree/bt_debug.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index 957ccdbea1a..b62125e069d 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -679,8 +679,11 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) } WT_RET(ds->f(ds, ": %s\n", __wt_page_type_string(page->type))); - WT_RET(ds->f(ds, - "\t" "disk %p, entries %" PRIu32, (void *)page->dsk, entries)); + WT_RET(ds->f(ds, "\t" "disk %p", (void *)page->dsk)); + if (page->dsk != NULL) + WT_RET(ds->f( + ds, ", dsk_mem_size %" PRIu32, page->dsk->mem_size)); + WT_RET(ds->f(ds, ", entries %" PRIu32, entries)); WT_RET(ds->f(ds, ", %s", __wt_page_is_modified(page) ? "dirty" : "clean")); WT_RET(ds->f(ds, ", %s", __wt_rwlock_islocked( -- cgit v1.2.1 From 9dabbaf2da6ecdd337436f815bb6468802b9c07a Mon Sep 17 00:00:00 2001 From: sueloverso Date: Fri, 6 Jan 2017 15:40:54 -0500 Subject: WT-3112 Add lock statistics to try_lock path. (#3233) --- src/evict/evict_lru.c | 4 ++-- src/include/mutex.i | 30 ++++++++++++++++++++++++++++++ src/include/schema.h | 2 +- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 08cafcf32ed..2cedb1d49c3 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -326,7 +326,7 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work) * otherwise we can block applications evicting large pages. */ if (!__wt_cache_stuck(session)) { - for (spins = 0; (ret = __wt_spin_trylock( + for (spins = 0; (ret = __wt_spin_trylock_track( session, &conn->dhandle_lock)) == EBUSY && cache->pass_intr == 0; spins++) { if (spins < WT_THOUSAND) @@ -1264,7 +1264,7 @@ retry: while (slot < max_entries) { * reference count to keep it alive while we sweep. */ if (!dhandle_locked) { - for (spins = 0; (ret = __wt_spin_trylock( + for (spins = 0; (ret = __wt_spin_trylock_track( session, &conn->dhandle_lock)) == EBUSY && cache->pass_intr == 0; spins++) { diff --git a/src/include/mutex.i b/src/include/mutex.i index a6309e0976b..a9abef5be70 100644 --- a/src/include/mutex.i +++ b/src/include/mutex.i @@ -300,3 +300,33 @@ __wt_spin_lock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t) } else __wt_spin_lock(session, t); } + +/* + * __wt_spin_trylock_track -- + * Try to lock a spinlock or fail immediately if it is busy. + * Track if successful. + */ +static inline int +__wt_spin_trylock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t) +{ + struct timespec enter, leave; + WT_DECL_RET; + int64_t **stats; + + if (t->stat_count_off != -1 && WT_STAT_ENABLED(session)) { + __wt_epoch(session, &enter); + ret = __wt_spin_trylock(session, t); + __wt_epoch(session, &leave); + WT_RET(ret); + stats = (int64_t **)S2C(session)->stats; + stats[session->stat_bucket][t->stat_count_off]++; + if (F_ISSET(session, WT_SESSION_INTERNAL)) + stats[session->stat_bucket][t->stat_int_usecs_off] += + (int64_t)WT_TIMEDIFF_US(leave, enter); + else + stats[session->stat_bucket][t->stat_app_usecs_off] += + (int64_t)WT_TIMEDIFF_US(leave, enter); + } else + ret = __wt_spin_trylock(session, t); + return (ret); +} diff --git a/src/include/schema.h b/src/include/schema.h index a17affb7660..bb116e5cf2f 100644 --- a/src/include/schema.h +++ b/src/include/schema.h @@ -102,7 +102,7 @@ struct __wt_table { ret = 0; \ if (F_ISSET(session, (flag))) { \ op; \ - } else if ((ret = __wt_spin_trylock(session, lock)) == 0) { \ + } else if ((ret = __wt_spin_trylock_track(session, lock)) == 0) {\ F_SET(session, (flag)); \ op; \ F_CLR(session, (flag)); \ -- cgit v1.2.1 From 216903ac097f61ee787f08296b2f3be298f54087 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Mon, 9 Jan 2017 07:22:10 -0500 Subject: WT-3112 Time the eviction try-lock for the dhandle overall, not per-attempt (#3235) * WT-3112 Time the eviction try-lock for the dhandle overall, not per-attempt. * Fix comment * Some style preference and nits. * lint - my spell checker knows backoff * Minor refactoring. --- src/evict/evict_lru.c | 78 +++++++++++++++++++++++++++++++++++++-------------- src/include/mutex.i | 17 ++--------- 2 files changed, 60 insertions(+), 35 deletions(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 2cedb1d49c3..ba8851812cb 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -23,6 +23,59 @@ static int __evict_walk_file( #define WT_EVICT_HAS_WORKERS(s) \ (S2C(s)->evict_threads.current_threads > 1) +/* + * __evict_lock_dhandle -- + * Try to get the dhandle lock, with yield and sleep back off. + * Keep timing statistics overall. + */ +static int +__evict_lock_dhandle(WT_SESSION_IMPL *session) +{ + struct timespec enter, leave; + WT_CACHE *cache; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_SPINLOCK *dh_lock; + int64_t **stats; + u_int spins; + bool dh_stats; + + conn = S2C(session); + cache = conn->cache; + dh_lock = &conn->dhandle_lock; + stats = (int64_t **)conn->stats; + dh_stats = WT_STAT_ENABLED(session) && dh_lock->stat_count_off != -1; + + /* + * Maintain lock acquisition timing statistics as if this were a + * regular lock acquisition. + */ + if (dh_stats) + __wt_epoch(session, &enter); + /* + * Use a custom lock acquisition back off loop so the eviction server + * notices any interrupt quickly. + */ + for (spins = 0; + (ret = __wt_spin_trylock_track(session, dh_lock)) == EBUSY && + cache->pass_intr == 0; spins++) { + if (spins < WT_THOUSAND) + __wt_yield(); + else + __wt_sleep(0, WT_THOUSAND); + } + /* + * Only record statistics on success. + */ + WT_RET(ret); + if (dh_stats) { + __wt_epoch(session, &leave); + stats[session->stat_bucket][dh_lock->stat_int_usecs_off] += + (int64_t)WT_TIMEDIFF_US(leave, enter); + } + return (0); +} + /* * __evict_entry_priority -- * Get the adjusted read generation for an eviction entry. @@ -307,7 +360,6 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work) struct timespec now; #endif uint64_t orig_pages_evicted; - u_int spins; conn = S2C(session); cache = conn->cache; @@ -326,21 +378,14 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work) * otherwise we can block applications evicting large pages. */ if (!__wt_cache_stuck(session)) { - for (spins = 0; (ret = __wt_spin_trylock_track( - session, &conn->dhandle_lock)) == EBUSY && - cache->pass_intr == 0; spins++) { - if (spins < WT_THOUSAND) - __wt_yield(); - else - __wt_sleep(0, WT_THOUSAND); - } + /* * If we gave up acquiring the lock, that indicates a * session is waiting for us to clear walks. Do that * as part of a normal pass (without the handle list * lock) to avoid deadlock. */ - if (ret == EBUSY) + if ((ret = __evict_lock_dhandle(session)) == EBUSY) return (0); WT_RET(ret); ret = __evict_clear_all_walks(session); @@ -1226,7 +1271,7 @@ __evict_walk(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue) WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle; WT_DECL_RET; - u_int max_entries, retries, slot, spins, start_slot, total_candidates; + u_int max_entries, retries, slot, start_slot, total_candidates; bool dhandle_locked, incr; conn = S2C(session); @@ -1264,16 +1309,7 @@ retry: while (slot < max_entries) { * reference count to keep it alive while we sweep. */ if (!dhandle_locked) { - for (spins = 0; (ret = __wt_spin_trylock_track( - session, &conn->dhandle_lock)) == EBUSY && - cache->pass_intr == 0; - spins++) { - if (spins < WT_THOUSAND) - __wt_yield(); - else - __wt_sleep(0, WT_THOUSAND); - } - WT_ERR(ret); + WT_ERR(__evict_lock_dhandle(session)); dhandle_locked = true; } diff --git a/src/include/mutex.i b/src/include/mutex.i index a9abef5be70..6b83cb280d3 100644 --- a/src/include/mutex.i +++ b/src/include/mutex.i @@ -309,24 +309,13 @@ __wt_spin_lock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t) static inline int __wt_spin_trylock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t) { - struct timespec enter, leave; - WT_DECL_RET; int64_t **stats; if (t->stat_count_off != -1 && WT_STAT_ENABLED(session)) { - __wt_epoch(session, &enter); - ret = __wt_spin_trylock(session, t); - __wt_epoch(session, &leave); - WT_RET(ret); + WT_RET(__wt_spin_trylock(session, t)); stats = (int64_t **)S2C(session)->stats; stats[session->stat_bucket][t->stat_count_off]++; - if (F_ISSET(session, WT_SESSION_INTERNAL)) - stats[session->stat_bucket][t->stat_int_usecs_off] += - (int64_t)WT_TIMEDIFF_US(leave, enter); - else - stats[session->stat_bucket][t->stat_app_usecs_off] += - (int64_t)WT_TIMEDIFF_US(leave, enter); + return (0); } else - ret = __wt_spin_trylock(session, t); - return (ret); + return (__wt_spin_trylock(session, t)); } -- cgit v1.2.1 From 247b3a5f2c2b2d8ab53d151fa18a23143501c2b0 Mon Sep 17 00:00:00 2001 From: David Hows Date: Tue, 10 Jan 2017 11:13:46 +1100 Subject: WT-3106 Add truncate operation to wt command line utility (#3227) --- SConstruct | 1 + build_posix/Make.base | 1 + src/docs/command-line.dox | 13 ++++++ src/docs/upgrading.dox | 7 ++++ src/utilities/util.h | 1 + src/utilities/util_main.c | 5 +++ src/utilities/util_truncate.c | 51 ++++++++++++++++++++++++ test/suite/test_util14.py | 92 +++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 171 insertions(+) create mode 100644 src/utilities/util_truncate.c create mode 100644 test/suite/test_util14.py diff --git a/SConstruct b/SConstruct index df7a66238e8..e9e72630b11 100644 --- a/SConstruct +++ b/SConstruct @@ -313,6 +313,7 @@ wtbin = env.Program("wt", [ "src/utilities/util_rename.c", "src/utilities/util_salvage.c", "src/utilities/util_stat.c", + "src/utilities/util_truncate.c", "src/utilities/util_upgrade.c", "src/utilities/util_verbose.c", "src/utilities/util_verify.c", diff --git a/build_posix/Make.base b/build_posix/Make.base index 9354eb4b183..e5228fac885 100644 --- a/build_posix/Make.base +++ b/build_posix/Make.base @@ -36,6 +36,7 @@ wt_SOURCES =\ src/utilities/util_rename.c \ src/utilities/util_salvage.c \ src/utilities/util_stat.c \ + src/utilities/util_truncate.c \ src/utilities/util_upgrade.c \ src/utilities/util_verbose.c \ src/utilities/util_verify.c \ diff --git a/src/docs/command-line.dox b/src/docs/command-line.dox index 5726a1d19a1..df52324f8f8 100644 --- a/src/docs/command-line.dox +++ b/src/docs/command-line.dox @@ -369,6 +369,19 @@ The following are command-specific options for the \c stat command: Include only "fast" statistics in the output (equivalent to passing statistics=(fast)) to WT_SESSION::open_cursor. +
+@section util_truncate wt truncate +Truncate a table, removing all data. + +The \c truncate command truncates the specified \c uri. It is equivalent to a +call to WT_SESSION::truncate with no start or stop specified. + +@subsection util_truncate_synopsis Synopsis +wt [-RVv] [-C config] [-E secretkey ] [-h directory] truncate uri + +@subsection util_truncate_options Options +The \c truncate command has no command-specific options. +
@section util_upgrade wt upgrade Upgrade a table. diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index 59a299d48a1..4a356f7da61 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -1,6 +1,13 @@ /*! @page upgrading Upgrading WiredTiger applications @section version_292 Upgrading to Version 2.9.2 +
+
WiredTiger Utility now supports truncate
+
+The WiredTiger Utility can now \c truncate an object. Removing all contents +from the specified object. +
+
@section version_291 Upgrading to Version 2.9.1 diff --git a/src/utilities/util.h b/src/utilities/util.h index 2658d877b63..cf12d7d4aa6 100644 --- a/src/utilities/util.h +++ b/src/utilities/util.h @@ -49,6 +49,7 @@ int util_rename(WT_SESSION *, int, char *[]); int util_salvage(WT_SESSION *, int, char *[]); int util_stat(WT_SESSION *, int, char *[]); int util_str2recno(WT_SESSION *, const char *p, uint64_t *recnop); +int util_truncate(WT_SESSION *, int, char *[]); int util_upgrade(WT_SESSION *, int, char *[]); int util_verify(WT_SESSION *, int, char *[]); int util_write(WT_SESSION *, int, char *[]); diff --git a/src/utilities/util_main.c b/src/utilities/util_main.c index 1da56adf137..001a66d6d9e 100644 --- a/src/utilities/util_main.c +++ b/src/utilities/util_main.c @@ -175,6 +175,10 @@ main(int argc, char *argv[]) config = "statistics=(all)"; } break; + case 't' : + if (strcmp(command, "truncate") == 0) + func = util_truncate; + break; case 'u': if (strcmp(command, "upgrade") == 0) func = util_upgrade; @@ -272,6 +276,7 @@ usage(void) "\t" "rename\t rename an object\n" "\t" "salvage\t salvage a file\n" "\t" "stat\t display statistics for an object\n" + "\t" "truncate truncate an object, removing all content\n" "\t" "upgrade\t upgrade an object\n" "\t" "verify\t verify an object\n" "\t" "write\t write values to an object\n"); diff --git a/src/utilities/util_truncate.c b/src/utilities/util_truncate.c new file mode 100644 index 00000000000..9325c0d7e84 --- /dev/null +++ b/src/utilities/util_truncate.c @@ -0,0 +1,51 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "util.h" + +static int usage(void); + +int +util_truncate(WT_SESSION *session, int argc, char *argv[]) +{ + WT_DECL_RET; + int ch; + char *name; + + while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF) + switch (ch) { + case '?': + default: + return (usage()); + } + + argc -= __wt_optind; + argv += __wt_optind; + + /* The remaining argument is the uri. */ + if (argc != 1) + return (usage()); + if ((name = util_name(session, *argv, "table")) == NULL) + return (1); + + if ((ret = session->truncate(session, name, NULL, NULL, NULL)) != 0) + return (util_err(session, ret, "%s: session.truncate", name)); + + free(name); + return (ret); +} + +static int +usage(void) +{ + (void)fprintf(stderr, + "usage: %s %s " + "truncate uri\n", + progname, usage_prefix); + return (1); +} diff --git a/test/suite/test_util14.py b/test/suite/test_util14.py new file mode 100644 index 00000000000..e2a9f41f0d4 --- /dev/null +++ b/test/suite/test_util14.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import os +from suite_subprocess import suite_subprocess +import wiredtiger, wttest + +# test_util14.py +# Utilities: wt truncate +class test_util14(wttest.WiredTigerTestCase, suite_subprocess): + tablename = 'test_util14.a' + nentries = 1000 + + def test_truncate_process(self): + """ + Test truncate in a 'wt' process + """ + params = 'key_format=S,value_format=S' + self.session.create('table:' + self.tablename, params) + self.assertTrue(os.path.exists(self.tablename + ".wt")) + cursor = self.session.open_cursor('table:' + self.tablename, None, None) + for i in range(0, self.nentries): + cursor[str(i)] = str(i) + cursor.close() + + self.runWt(["truncate", "table:" + self.tablename]) + + """ + Test to confirm table exists and is empty + """ + outfile="outfile.txt" + errfile="errfile.txt" + self.assertTrue(os.path.exists(self.tablename + ".wt")) + self.runWt(["read", 'table:' + self.tablename, 'NoMatch'], + outfilename=outfile, errfilename=errfile, failure=True) + self.check_empty_file(outfile) + self.check_file_contains(errfile, 'NoMatch: not found\n') + + """ + Tests for error cases + 1. Missing URI + 2. Invalid URI + 3. Valid but incorrect URI + 4. Double URI + """ + self.runWt(["truncate"], + outfilename=outfile, errfilename=errfile, failure=True) + self.check_empty_file(outfile) + self.check_file_contains(errfile, 'usage:') + + self.runWt(["truncate", "foobar"], + outfilename=outfile, errfilename=errfile, failure=True) + self.check_empty_file(outfile) + self.check_file_contains(errfile, 'No such file or directory') + + self.runWt(["truncate", 'table:xx' + self.tablename], + outfilename=outfile, errfilename=errfile, failure=True) + self.check_empty_file(outfile) + self.check_file_contains(errfile, 'No such file or directory') + + self.runWt(["truncate", 'table:' + self.tablename, 'table:' + self.tablename], + outfilename=outfile, errfilename=errfile, failure=True) + self.check_empty_file(outfile) + self.check_file_contains(errfile, 'usage:') + +if __name__ == '__main__': + wttest.run() -- cgit v1.2.1 From aa1961b0056db9a3b38243d328b07f2d48d90f3d Mon Sep 17 00:00:00 2001 From: sueloverso Date: Mon, 9 Jan 2017 19:25:52 -0500 Subject: WT-3105 Create all eviction sessions initially to avoid deadlock. (#3237) --- src/evict/evict_lru.c | 32 +++++--------------------------- src/include/connection.h | 2 +- 2 files changed, 6 insertions(+), 28 deletions(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index ba8851812cb..948c1e1139e 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -437,18 +437,11 @@ __wt_evict_create(WT_SESSION_IMPL *session) /* * Create the eviction thread group. - * We don't set the group size to the maximum allowed sessions, - * because this may have adverse memory effects. Instead, - * we set the group's maximum to a small value. The code - * that tunes the number of workers will increase the - * maximum if necessary. + * Set the group size to the maximum allowed sessions. */ WT_RET(__wt_thread_group_create(session, &conn->evict_threads, - "eviction-server", conn->evict_threads_min, - WT_MAX(conn->evict_threads_min, - WT_MIN(conn->evict_threads_max, EVICT_GROUP_INCR)), - WT_THREAD_CAN_WAIT | WT_THREAD_PANIC_FAIL, - __wt_evict_thread_run)); + "eviction-server", conn->evict_threads_min, conn->evict_threads_max, + WT_THREAD_CAN_WAIT | WT_THREAD_PANIC_FAIL, __wt_evict_thread_run)); /* * Allow queues to be populated now that the eviction threads @@ -921,7 +914,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) WT_CONNECTION_IMPL *conn; uint64_t cur_threads, delta_msec, delta_pages, i, target_threads; uint64_t pgs_evicted_cur, pgs_evicted_persec_cur; - uint32_t new_max, thread_surplus; + uint32_t thread_surplus; conn = S2C(session); cache = conn->cache; @@ -1033,23 +1026,8 @@ __evict_tune_workers(WT_SESSION_IMPL *session) target_threads = WT_MIN(cur_threads + EVICT_TUNE_BATCH, conn->evict_threads_max); /* - * Resize the group to allow for an additional batch of threads. - * We resize the group in increments of a few sessions. - * Allocating the group to accommodate the maximum number of - * workers has adverse effects on performance due to memory - * effects, so we gradually ramp up the allocation. + * Start the new threads. */ - if (conn->evict_threads.max < target_threads) { - new_max = WT_MIN(conn->evict_threads.max + - EVICT_GROUP_INCR, conn->evict_threads_max); - - WT_RET(__wt_thread_group_resize( - session, &conn->evict_threads, - conn->evict_threads_min, new_max, - WT_THREAD_CAN_WAIT | WT_THREAD_PANIC_FAIL)); - } - - /* Now actually start the new threads. */ for (i = 0; i < (target_threads - cur_threads); ++i) { WT_RET(__wt_thread_group_start_one(session, &conn->evict_threads, false)); diff --git a/src/include/connection.h b/src/include/connection.h index 665275440cf..7d2b78e9f66 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -107,7 +107,7 @@ struct __wt_named_extractor { * Allocate some additional slots for internal sessions so the user cannot * configure too few sessions for us to run. */ -#define WT_EXTRA_INTERNAL_SESSIONS 10 +#define WT_EXTRA_INTERNAL_SESSIONS 20 /* * WT_CONN_CHECK_PANIC -- -- cgit v1.2.1 From c6c95a82915a6f0b96a4c514cf0b29e74bcd2f0a Mon Sep 17 00:00:00 2001 From: David Hows Date: Thu, 12 Jan 2017 16:10:13 +1100 Subject: WT-3110 Add more test cases for the WT command line utility (#3232) --- test/suite/test_util15.py | 71 +++++++++++++++++++++++++++++++++++++++++++++++ test/suite/test_util16.py | 71 +++++++++++++++++++++++++++++++++++++++++++++++ test/suite/test_util17.py | 57 +++++++++++++++++++++++++++++++++++++ 3 files changed, 199 insertions(+) create mode 100644 test/suite/test_util15.py create mode 100644 test/suite/test_util16.py create mode 100644 test/suite/test_util17.py diff --git a/test/suite/test_util15.py b/test/suite/test_util15.py new file mode 100644 index 00000000000..33096e71bee --- /dev/null +++ b/test/suite/test_util15.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import os +from suite_subprocess import suite_subprocess +import wiredtiger, wttest + +# test_util15.py +# Utilities: wt alter +class test_util15(wttest.WiredTigerTestCase, suite_subprocess): + tablename = 'test_util15.a' + + def test_alter_process(self): + """ + Test alter in a 'wt' process + """ + params = 'key_format=S,value_format=S' + self.session.create('table:' + self.tablename, params) + self.assertTrue(os.path.exists(self.tablename + ".wt")) + + """ + Alter access pattern and confirm + """ + acc_pat_seq="access_pattern_hint=sequential" + self.runWt(["alter", "table:" + self.tablename, acc_pat_seq]) + cursor = self.session.open_cursor("metadata:create", None, None) + cursor.set_key("table:" + self.tablename) + self.assertEqual(cursor.search(),0) + string = cursor.get_value() + cursor.close() + self.assertTrue(acc_pat_seq in string) + + """ + Alter access pattern again and confirm + """ + acc_pat_rand="access_pattern_hint=random" + self.runWt(["alter", "table:" + self.tablename, acc_pat_rand]) + cursor = self.session.open_cursor("metadata:create", None, None) + cursor.set_key("table:" + self.tablename) + self.assertEqual(cursor.search(),0) + string = cursor.get_value() + cursor.close() + self.assertTrue(acc_pat_rand in string) + +if __name__ == '__main__': + wttest.run() diff --git a/test/suite/test_util16.py b/test/suite/test_util16.py new file mode 100644 index 00000000000..00e68c1017a --- /dev/null +++ b/test/suite/test_util16.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import os +from suite_subprocess import suite_subprocess +import wiredtiger, wttest + +# test_util16.py +# Utilities: wt rename +class test_util16(wttest.WiredTigerTestCase, suite_subprocess): + tablename = 'test_util16.a' + tablename2 = 'test_util16.b' + nentries = 1000 + + def test_rename_process(self): + """ + Test alter in a 'wt' process + """ + params = 'key_format=S,value_format=S' + self.session.create('table:' + self.tablename, params) + self.assertTrue(os.path.exists(self.tablename + ".wt")) + cursor = self.session.open_cursor('table:' + self.tablename, None, None) + for i in range(0, self.nentries): + cursor[str(i)] = str(i) + cursor.close() + + self.runWt(["rename", "table:" + self.tablename, "table:" + self.tablename2]) + self.assertTrue(os.path.exists(self.tablename2 + ".wt")) + cursor = self.session.open_cursor('table:' + self.tablename2, None, None) + count = 0 + while cursor.next() == 0: + count +=1 + cursor.close() + self.assertEquals(self.nentries, count) + + self.runWt(["rename", "table:" + self.tablename2, "table:" + self.tablename]) + self.assertTrue(os.path.exists(self.tablename + ".wt")) + cursor = self.session.open_cursor('table:' + self.tablename, None, None) + count = 0 + while cursor.next() == 0: + count +=1 + cursor.close() + self.assertEquals(self.nentries, count) + +if __name__ == '__main__': + wttest.run() diff --git a/test/suite/test_util17.py b/test/suite/test_util17.py new file mode 100644 index 00000000000..decc1fabf1d --- /dev/null +++ b/test/suite/test_util17.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import os +from suite_subprocess import suite_subprocess +import wiredtiger, wttest + +# test_util17.py +# Utilities: wt stat +class test_util17(wttest.WiredTigerTestCase, suite_subprocess): + tablename = 'test_util17.a' + + def test_stat_process(self): + """ + Test stat in a 'wt' process + This test is just here to confirm that stat produces a correct looking + output, it isn't here to do statistics validation. + """ + params = 'key_format=S,value_format=S' + outfile = "wt-stat.out" + expected_string = "cursor: cursor create calls=" + self.session.create('table:' + self.tablename, params) + self.assertTrue(os.path.exists(self.tablename + ".wt")) + self.runWt(["stat"], outfilename=outfile) + self.check_file_contains(outfile, expected_string) + + expected_string = "cache_walk: Entries in the root page=1" + self.runWt(["stat", "table:" + self.tablename ], outfilename=outfile) + self.check_file_contains(outfile, expected_string) + +if __name__ == '__main__': + wttest.run() -- cgit v1.2.1 From 4d85f283c3607ada88922eb7579fb9aa6ee73ba3 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 12 Jan 2017 20:22:40 -0500 Subject: bug: dist/s_all script has misplaced quote causing bad error reporting (#3243) --- dist/s_all | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dist/s_all b/dist/s_all index 4c9d4eccebb..d7a69b08644 100755 --- a/dist/s_all +++ b/dist/s_all @@ -97,10 +97,10 @@ COMMANDS=" 2>&1 ./s_string > ${t_pfx}s_string 2>&1 ./s_tags > ${t_pfx}tags 2>&1 ./s_typedef -c > ${t_pfx}s_typedef_c -2>&1 ./s_void > ${t_pfx}s_void" +2>&1 ./s_void > ${t_pfx}s_void 2>&1 ./s_whitespace > ${t_pfx}s_whitespace 2>&1 ./s_win > ${t_pfx}s_win -2>&1 python style.py > ${t_pfx}py_style +2>&1 python style.py > ${t_pfx}py_style" # Parallelize if possible. xp="" -- cgit v1.2.1 From ec9b2bd417be1fad7484335390385c2a669fc407 Mon Sep 17 00:00:00 2001 From: David Hows Date: Fri, 13 Jan 2017 23:24:33 +1100 Subject: WT-3116 Change s_all to avoid missing error returns on older platforms (#3246) --- dist/s_all | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/dist/s_all b/dist/s_all index d7a69b08644..be33657e640 100755 --- a/dist/s_all +++ b/dist/s_all @@ -57,7 +57,7 @@ errchk() # Some tests shouldn't return an error, we exclude them here. case "$1" in *s_export|*s_tags) - break;; + ;; *) errfound=1;; esac @@ -111,14 +111,13 @@ fi echo "$COMMANDS" | xargs $xp -I{} /bin/sh -c {} for f in `find . -name ${t_pfx}\*`; do - if ! `test -s $f`; then - continue + if `test -s $f`; then + LOCAL_NAME=`basename $f` + # Find original command and trim redirect garbage + FAILED_CMD=`echo "$COMMANDS" | grep $LOCAL_NAME | \ + sed -e 's/ >.*//' -e 's/.* //'` + errchk "$FAILED_CMD" $f fi - LOCAL_NAME=`basename $f` - # Find original command and trim redirect garbage - FAILED_CMD=`echo "$COMMANDS" | grep $LOCAL_NAME | \ - sed -e 's/ >.*//' -e 's/.* //'` - errchk "$FAILED_CMD" $f done echo 'dist/s_all run finished' -- cgit v1.2.1 From 36c9a6513bee481a7ef27f0696a88f9b1921c356 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Sun, 15 Jan 2017 20:31:47 -0500 Subject: WT-3114 Avoid archiving log files immediately after recovery. (#3238) --- src/txn/txn_log.c | 16 +++++++++------- test/suite/test_reconfig02.py | 1 + test/suite/test_txn02.py | 4 +++- test/suite/test_txn05.py | 6 +++++- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c index 5f4704b40c4..7ad295f421b 100644 --- a/src/txn/txn_log.c +++ b/src/txn/txn_log.c @@ -368,14 +368,16 @@ __wt_txn_checkpoint_log( /* * If this full checkpoint completed successfully and there is - * no hot backup in progress, tell the logging subsystem the - * checkpoint LSN so that it can archive. Do not update the - * logging checkpoint LSN if this is during a clean connection - * close, only during a full checkpoint. A clean close may not - * update any metadata LSN and we do not want to archive in - * that case. + * no hot backup in progress and this is not recovery, tell + * the logging subsystem the checkpoint LSN so that it can + * archive. Do not update the logging checkpoint LSN if this + * is during a clean connection close, only during a full + * checkpoint. A clean close may not update any metadata LSN + * and we do not want to archive in that case. */ - if (!S2C(session)->hot_backup && txn->full_ckpt) + if (!S2C(session)->hot_backup && + !F_ISSET(S2C(session), WT_CONN_RECOVERING) && + txn->full_ckpt) __wt_log_ckpt(session, ckpt_lsn); /* FALLTHROUGH */ diff --git a/test/suite/test_reconfig02.py b/test/suite/test_reconfig02.py index 36a78a1805f..8054b2a6ab5 100644 --- a/test/suite/test_reconfig02.py +++ b/test/suite/test_reconfig02.py @@ -109,6 +109,7 @@ class test_reconfig02(wttest.WiredTigerTestCase): # Now turn on archive, sleep a bit to allow the archive thread # to run and then confirm that all original logs are gone. self.conn.reconfigure("log=(archive=true)") + self.session.checkpoint("force") time.sleep(2) cur_logs = fnmatch.filter(os.listdir('.'), "*Log*") for o in orig_logs: diff --git a/test/suite/test_txn02.py b/test/suite/test_txn02.py index a0c2c12a47c..7c2a58516bc 100644 --- a/test/suite/test_txn02.py +++ b/test/suite/test_txn02.py @@ -176,8 +176,10 @@ class test_txn02(wttest.WiredTigerTestCase, suite_subprocess): backup_conn = self.wiredtiger_open(self.backup_dir, backup_conn_params) try: - self.check(backup_conn.open_session(), None, committed) + session = backup_conn.open_session() finally: + session.checkpoint("force") + self.check(backup_conn.open_session(), None, committed) # Sleep long enough so that the archive thread is guaranteed # to run before we close the connection. time.sleep(1.0) diff --git a/test/suite/test_txn05.py b/test/suite/test_txn05.py index 9e84fe7d3fe..5913c4688a3 100644 --- a/test/suite/test_txn05.py +++ b/test/suite/test_txn05.py @@ -139,8 +139,12 @@ class test_txn05(wttest.WiredTigerTestCase, suite_subprocess): backup_conn = self.wiredtiger_open(self.backup_dir, backup_conn_params) try: - self.check(backup_conn.open_session(), None, committed) + session = backup_conn.open_session() finally: + self.check(session, None, committed) + # Force a checkpoint because we don't record the recovery + # checkpoint as available for archiving. + session.checkpoint("force") # Sleep long enough so that the archive thread is guaranteed # to run before we close the connection. time.sleep(1.0) -- cgit v1.2.1 From 67f96585500a67236e6df2d633acf64dfe16fe5f Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Sun, 15 Jan 2017 22:21:24 -0500 Subject: WT-3121 In the test suite, create a standard way to load extensions (#3241) * In the test suite, create a standard way to load extensions. Most examples of overriding setUpConnectionOpen() can now be handled by a combination of conn_config (as variable or method) and conn_extensions (as variable or method). * conn_config when defined as a method, now only takes the self argument, clean up the callers. * Refactored several more tests to use conn_config() in favor of overriding setUpConnectionOpen(). --- test/suite/run.py | 3 +- test/suite/test_async01.py | 2 +- test/suite/test_async02.py | 2 +- test/suite/test_backup04.py | 2 +- test/suite/test_bug011.py | 2 +- test/suite/test_collator.py | 29 +--------------- test/suite/test_compress01.py | 20 +++-------- test/suite/test_cursor07.py | 2 +- test/suite/test_cursor08.py | 20 +++-------- test/suite/test_encrypt01.py | 39 +++++---------------- test/suite/test_encrypt02.py | 46 ++++++------------------ test/suite/test_encrypt03.py | 36 ++++--------------- test/suite/test_encrypt04.py | 42 +++++++--------------- test/suite/test_encrypt05.py | 39 +++++---------------- test/suite/test_encrypt06.py | 38 ++++---------------- test/suite/test_encrypt07.py | 33 +++--------------- test/suite/test_join03.py | 28 +-------------- test/suite/test_join04.py | 28 +-------------- test/suite/test_join07.py | 28 +-------------- test/suite/test_readonly01.py | 3 +- test/suite/test_schema05.py | 28 +-------------- test/suite/test_schema07.py | 3 +- test/suite/test_stat02.py | 2 +- test/suite/test_txn02.py | 22 +++++------- test/suite/test_txn04.py | 22 +++++------- test/suite/test_txn05.py | 22 +++++------- test/suite/test_txn06.py | 4 +-- test/suite/test_txn07.py | 53 +++++++++------------------- test/suite/test_txn08.py | 2 +- test/suite/test_txn09.py | 17 +++------ test/suite/test_txn11.py | 2 +- test/suite/test_txn13.py | 2 +- test/suite/test_txn15.py | 2 +- test/suite/wttest.py | 81 +++++++++++++++++++++++++++++++++++++++---- 34 files changed, 207 insertions(+), 497 deletions(-) diff --git a/test/suite/run.py b/test/suite/run.py index ba6d9f78503..97c58bfdccf 100644 --- a/test/suite/run.py +++ b/test/suite/run.py @@ -324,7 +324,8 @@ if __name__ == '__main__': # All global variables should be set before any test classes are loaded. # That way, verbose printing can be done at the class definition level. wttest.WiredTigerTestCase.globalSetup(preserve, timestamp, gdbSub, - verbose, dirarg, longtest) + verbose, wt_builddir, dirarg, + longtest) # Without any tests listed as arguments, do discovery if len(testargs) == 0: diff --git a/test/suite/test_async01.py b/test/suite/test_async01.py index cbb3dad8de6..158c16a9381 100644 --- a/test/suite/test_async01.py +++ b/test/suite/test_async01.py @@ -132,7 +132,7 @@ class test_async01(wttest.WiredTigerTestCase, suite_subprocess): ]) # Enable async for this test. - def conn_config(self, dir): + def conn_config(self): return 'async=(enabled=true,ops_max=%s,' % self.async_ops + \ 'threads=%s)' % self.async_threads diff --git a/test/suite/test_async02.py b/test/suite/test_async02.py index 50652da6dfd..28435fe85b2 100644 --- a/test/suite/test_async02.py +++ b/test/suite/test_async02.py @@ -129,7 +129,7 @@ class test_async02(wttest.WiredTigerTestCase, suite_subprocess): ]) # Enable async for this test. - def conn_config(self, dir): + def conn_config(self): return 'async=(enabled=true,ops_max=%s,' % self.async_ops + \ 'threads=%s)' % self.async_threads diff --git a/test/suite/test_backup04.py b/test/suite/test_backup04.py index 919649fed57..be52a5e1e97 100644 --- a/test/suite/test_backup04.py +++ b/test/suite/test_backup04.py @@ -60,7 +60,7 @@ class test_backup_target(wttest.WiredTigerTestCase, suite_subprocess): ]) # Create a large cache, otherwise this test runs quite slowly. - def conn_config(self, dir): + def conn_config(self): return 'cache_size=1G,log=(archive=false,enabled,file_max=%s)' % \ self.logmax diff --git a/test/suite/test_bug011.py b/test/suite/test_bug011.py index 969aaeb5b39..5e0721b93f1 100644 --- a/test/suite/test_bug011.py +++ b/test/suite/test_bug011.py @@ -43,7 +43,7 @@ class test_bug011(wttest.WiredTigerTestCase): nrows = 10000 nops = 10000 # Add connection configuration for this test. - def conn_config(self, dir): + def conn_config(self): return 'cache_size=1GB' @wttest.longtest("Eviction copes with lots of files") diff --git a/test/suite/test_collator.py b/test/suite/test_collator.py index 3fae4ff47cb..e7be557335e 100644 --- a/test/suite/test_collator.py +++ b/test/suite/test_collator.py @@ -48,34 +48,7 @@ class test_collator(wttest.WiredTigerTestCase): nentries = 100 nindices = 4 - # Return the wiredtiger_open extension argument for a shared library. - def extensionArg(self, exts): - extfiles = [] - for ext in exts: - (dirname, name, libname) = ext - if name != None and name != 'none': - testdir = os.path.dirname(__file__) - extdir = os.path.join(run.wt_builddir, 'ext', dirname) - extfile = os.path.join( - extdir, name, '.libs', 'libwiredtiger_' + libname + '.so') - if not os.path.exists(extfile): - self.skipTest('extension "' + extfile + '" not built') - if not extfile in extfiles: - extfiles.append(extfile) - if len(extfiles) == 0: - return '' - else: - return ',extensions=["' + '","'.join(extfiles) + '"]' - - # Override WiredTigerTestCase, we have extensions. - def setUpConnectionOpen(self, dir): - extarg = self.extensionArg([('extractors', 'csv', 'csv_extractor'), - ('collators', 'revint', 'revint_collator')]) - connarg = 'create,error_prefix="{0}: ",{1}'.format( - self.shortid(), extarg) - conn = self.wiredtiger_open(dir, connarg) - self.pr(`conn`) - return conn + conn_extensions = [ 'extractors/csv', 'collators/revint' ] def create_indices(self): # Create self.nindices index files, each with a column from the CSV diff --git a/test/suite/test_compress01.py b/test/suite/test_compress01.py index 606f7b63235..ef1064d294e 100644 --- a/test/suite/test_compress01.py +++ b/test/suite/test_compress01.py @@ -51,22 +51,10 @@ class test_compress01(wttest.WiredTigerTestCase): nrecords = 10000 bigvalue = "abcdefghij" * 1000 - # Load the compression extension, compression is enabled elsewhere. - def conn_config(self, dir): - return self.extensionArg(self.compress) - - # Return the wiredtiger_open extension argument for a shared library. - def extensionArg(self, name): - if name == None: - return '' - - testdir = os.path.dirname(__file__) - extdir = os.path.join(run.wt_builddir, 'ext/compressors') - extfile = os.path.join( - extdir, name, '.libs', 'libwiredtiger_' + name + '.so') - if not os.path.exists(extfile): - self.skipTest('compression extension "' + extfile + '" not built') - return ',extensions=["' + extfile + '"]' + # Load the compression extension, skip the test if missing + def conn_extensions(self, extlist): + extlist.skip_if_missing = True + extlist.extension('compressors', self.compress) # Create a table, add keys with both big and small values, then verify them. def test_compress(self): diff --git a/test/suite/test_cursor07.py b/test/suite/test_cursor07.py index d6078183fc1..19db718fd11 100644 --- a/test/suite/test_cursor07.py +++ b/test/suite/test_cursor07.py @@ -49,7 +49,7 @@ class test_cursor07(wttest.WiredTigerTestCase, suite_subprocess): ('reopen', dict(reopen=True)) ]) # Enable logging for this test. - def conn_config(self, dir): + def conn_config(self): return 'log=(archive=false,enabled,file_max=%s),' % self.logmax + \ 'transaction_sync="(method=dsync,enabled)"' diff --git a/test/suite/test_cursor08.py b/test/suite/test_cursor08.py index 3f8f50defa7..cc76f528aa9 100644 --- a/test/suite/test_cursor08.py +++ b/test/suite/test_cursor08.py @@ -54,24 +54,14 @@ class test_cursor08(wttest.WiredTigerTestCase, suite_subprocess): ] scenarios = make_scenarios(reopens, compress) # Load the compression extension, and enable it for logging. - def conn_config(self, dir): + def conn_config(self): return 'log=(archive=false,enabled,file_max=%s,' % self.logmax + \ 'compressor=%s),' % self.compress + \ - 'transaction_sync="(method=dsync,enabled)",' + \ - self.extensionArg(self.compress) + 'transaction_sync="(method=dsync,enabled)"' - # Return the wiredtiger_open extension argument for a shared library. - def extensionArg(self, name): - if name == None or name == 'none': - return '' - - testdir = os.path.dirname(__file__) - extdir = os.path.join(run.wt_builddir, 'ext/compressors') - extfile = os.path.join( - extdir, name, '.libs', 'libwiredtiger_' + name + '.so') - if not os.path.exists(extfile): - self.skipTest('compression extension "' + extfile + '" not built') - return ',extensions=["' + extfile + '"]' + def conn_extensions(self, extlist): + extlist.skip_if_missing = True + extlist.extension('compressors', self.compress) def test_log_cursor(self): # print "Creating %s with config '%s'" % (self.uri, self.create_params) diff --git a/test/suite/test_encrypt01.py b/test/suite/test_encrypt01.py index 746c9d13e96..317bed93246 100644 --- a/test/suite/test_encrypt01.py +++ b/test/suite/test_encrypt01.py @@ -66,41 +66,20 @@ class test_encrypt01(wttest.WiredTigerTestCase): nrecords = 5000 bigvalue = "abcdefghij" * 1001 # len(bigvalue) = 10010 - # Override WiredTigerTestCase, we have extensions. - def setUpConnectionOpen(self, dir): + def conn_extensions(self, extlist): + extlist.skip_if_missing = True + extlist.extension('encryptors', self.sys_encrypt) + extlist.extension('encryptors', self.file_encrypt) + extlist.extension('compressors', self.block_compress) + extlist.extension('compressors', self.log_compress) + + def conn_config(self): encarg = 'encryption=(name={0}{1}),'.format( self.sys_encrypt, self.sys_encrypt_args) comparg = '' if self.log_compress != None: comparg='log=(compressor={0}),'.format(self.log_compress) - extarg = self.extensionArg([('encryptors', self.sys_encrypt), - ('encryptors', self.file_encrypt), - ('compressors', self.block_compress), - ('compressors', self.log_compress)]) - conn = self.wiredtiger_open(dir, - 'create,error_prefix="{0}: ",{1}{2}{3}'.format( - self.shortid(), encarg, comparg, extarg)) - self.pr(`conn`) - return conn - - # Return the wiredtiger_open extension argument for a shared library. - def extensionArg(self, exts): - extfiles = [] - for ext in exts: - (dirname, name) = ext - if name != None and name != 'none': - testdir = os.path.dirname(__file__) - extdir = os.path.join(run.wt_builddir, 'ext', dirname) - extfile = os.path.join( - extdir, name, '.libs', 'libwiredtiger_' + name + '.so') - if not os.path.exists(extfile): - self.skipTest('extension "' + extfile + '" not built') - if not extfile in extfiles: - extfiles.append(extfile) - if len(extfiles) == 0: - return '' - else: - return ',extensions=["' + '","'.join(extfiles) + '"]' + return encarg + comparg # Create a table, add keys with both big and small values, then verify them. def test_encrypt(self): diff --git a/test/suite/test_encrypt02.py b/test/suite/test_encrypt02.py index 648686274c4..2d3b8a29b13 100644 --- a/test/suite/test_encrypt02.py +++ b/test/suite/test_encrypt02.py @@ -39,51 +39,25 @@ from wtscenario import make_scenarios class test_encrypt02(wttest.WiredTigerTestCase, suite_subprocess): uri = 'file:test_encrypt02' encrypt_type = [ - ('noarg', dict( encrypt='rotn', encrypt_args='name=rotn', - secret_arg=None)), - ('keyid', dict( encrypt='rotn', encrypt_args='name=rotn,keyid=11', - secret_arg=None)), - ('pass', dict( encrypt='rotn', encrypt_args='name=rotn', - secret_arg='ABC')), - ('keyid-pass', dict( encrypt='rotn', encrypt_args='name=rotn,keyid=11', - secret_arg='ABC')), + ('noarg', dict( conn_extensions=[ 'encryptors/rotn' ], + encrypt_args='name=rotn', secret_arg=None)), + ('keyid', dict( conn_extensions=[ 'encryptors/rotn' ], + encrypt_args='name=rotn,keyid=11', secret_arg=None)), + ('pass', dict( conn_extensions=[ 'encryptors/rotn' ], + encrypt_args='name=rotn', secret_arg='ABC')), + ('keyid-pass', dict( conn_extensions=[ 'encryptors/rotn' ], + encrypt_args='name=rotn,keyid=11', secret_arg='ABC')), ] scenarios = make_scenarios(encrypt_type) nrecords = 5000 bigvalue = "abcdefghij" * 1001 # len(bigvalue) = 10010 - # Return the wiredtiger_open extension argument for a shared library. - def extensionArg(self, exts): - extfiles = [] - for ext in exts: - (dirname, name) = ext - if name != None and name != 'none': - testdir = os.path.dirname(__file__) - extdir = os.path.join(run.wt_builddir, 'ext', dirname) - extfile = os.path.join( - extdir, name, '.libs', 'libwiredtiger_' + name + '.so') - if not os.path.exists(extfile): - self.skipTest('extension "' + extfile + '" not built') - if not extfile in extfiles: - extfiles.append(extfile) - if len(extfiles) == 0: - return '' - else: - return ',extensions=["' + '","'.join(extfiles) + '"]' - - # Override WiredTigerTestCase, we have extensions. - def setUpConnectionOpen(self, dir): + def conn_config(self): secretarg = '' if self.secret_arg != None: secretarg = ',secretkey=' + self.secret_arg - encarg = 'encryption=({0}{1})'.format(self.encrypt_args, secretarg) - extarg = self.extensionArg([('encryptors', self.encrypt)]) - connarg = 'create,error_prefix="{0}: ",{1},{2}'.format( - self.shortid(), encarg, extarg) - conn = self.wiredtiger_open(dir, connarg) - self.pr(`conn`) - return conn + return 'encryption=({0}{1})'.format(self.encrypt_args, secretarg) # Create a table, add keys with both big and small values, then verify them. def test_pass(self): diff --git a/test/suite/test_encrypt03.py b/test/suite/test_encrypt03.py index cf459190637..0809c16c6d1 100644 --- a/test/suite/test_encrypt03.py +++ b/test/suite/test_encrypt03.py @@ -50,37 +50,13 @@ class test_encrypt03(wttest.WiredTigerTestCase): ] scenarios = make_scenarios(types, encrypt) - # Override WiredTigerTestCase, we have extensions. - def setUpConnectionOpen(self, dir): - encarg = 'encryption=(name={0}{1}),'.format( - self.sys_encrypt, self.sys_encrypt_args) - extarg = self.extensionArg([('encryptors', self.sys_encrypt), - ('encryptors', self.file_encrypt)]) - self.pr('encarg = ' + encarg + ' extarg = ' + extarg) - conn = self.wiredtiger_open(dir, - 'create,error_prefix="{0}: ",{1}{2}'.format( - self.shortid(), encarg, extarg)) - self.pr(`conn`) - return conn + def conn_extensions(self, extlist): + extlist.extension('encryptors', self.sys_encrypt) + extlist.extension('encryptors', self.file_encrypt) - # Return the wiredtiger_open extension argument for a shared library. - def extensionArg(self, exts): - extfiles = [] - for ext in exts: - (dirname, name) = ext - if name != None and name != 'none': - testdir = os.path.dirname(__file__) - extdir = os.path.join(run.wt_builddir, 'ext', dirname) - extfile = os.path.join( - extdir, name, '.libs', 'libwiredtiger_' + name + '.so') - if not os.path.exists(extfile): - self.skipTest('extension "' + extfile + '" not built') - if not extfile in extfiles: - extfiles.append(extfile) - if len(extfiles) == 0: - return '' - else: - return ',extensions=["' + '","'.join(extfiles) + '"]' + def conn_config(self): + return 'encryption=(name={0}{1}),'.format( + self.sys_encrypt, self.sys_encrypt_args) # Create a table with encryption values that are in error. def test_encrypt(self): diff --git a/test/suite/test_encrypt04.py b/test/suite/test_encrypt04.py index a244cf97961..a10e6c28831 100644 --- a/test/suite/test_encrypt04.py +++ b/test/suite/test_encrypt04.py @@ -77,9 +77,15 @@ class test_encrypt04(wttest.WiredTigerTestCase, suite_subprocess): wttest.WiredTigerTestCase.__init__(self, *args, **kwargs) self.part = 1 + def conn_extensions(self, extlist): + extarg = None + if self.expect_forceerror: + extarg='(config=\"rotn_force_error=true\")' + extlist.extension('encryptors', self.name, extarg) + # Override WiredTigerTestCase, we have extensions. def setUpConnectionOpen(self, dir): - forceerror = None + self.expect_forceerror = False if self.part == 1: self.name = self.name1 self.keyid = self.keyid1 @@ -93,16 +99,15 @@ class test_encrypt04(wttest.WiredTigerTestCase, suite_subprocess): self.fileinclear = self.fileinclear2 if \ hasattr(self, 'fileinclear2') else False if hasattr(self, 'forceerror1') and hasattr(self, 'forceerror2'): - forceerror = "rotn_force_error=true" - self.expect_forceerror = forceerror != None + self.expect_forceerror = True self.got_forceerror = False encarg = 'encryption=(name={0},keyid={1},secretkey={2}),'.format( self.name, self.keyid, self.secretkey) - # If forceerror is set for this test, add a config arg to - # the extension string. That signals rotn to return a (-1000) - # error code, which we'll detect here. - extarg = self.extensionArg([('encryptors', self.name, forceerror)]) + # If forceerror is set for this test, conn_extensions adds a + # config arg to the extension string. That signals rotn to + # return a (-1000) error code, which we'll detect here. + extarg = self.extensionsConfig() self.pr('encarg = ' + encarg + ' extarg = ' + extarg) completed = False try: @@ -135,29 +140,6 @@ class test_encrypt04(wttest.WiredTigerTestCase, suite_subprocess): self.assertEqual(cursor.search(), 0) self.assertEquals(cursor.get_value(), val) - # Return the wiredtiger_open extension argument for a shared library. - def extensionArg(self, exts): - extfiles = [] - for ext in exts: - (dirname, name, extarg) = ext - if name != None and name != 'none': - testdir = os.path.dirname(__file__) - extdir = os.path.join(run.wt_builddir, 'ext', dirname) - extfile = os.path.join( - extdir, name, '.libs', 'libwiredtiger_' + name + '.so') - if not os.path.exists(extfile): - self.skipTest('extension "' + extfile + '" not built') - extfile = '"' + extfile + '"' - if not extfile in extfiles: - s = extfile - if extarg != None: - s += "=(config=\"" + extarg + "\")" - extfiles.append(s) - if len(extfiles) == 0: - return '' - else: - return ',extensions=[' + ','.join(extfiles) + ']' - # Evaluate expression, which either must succeed (if expect_okay) # or must fail (if !expect_okay). def check_okay(self, expect_okay, expr): diff --git a/test/suite/test_encrypt05.py b/test/suite/test_encrypt05.py index 19a3522b3d5..d8862321821 100644 --- a/test/suite/test_encrypt05.py +++ b/test/suite/test_encrypt05.py @@ -49,41 +49,20 @@ class test_encrypt05(wttest.WiredTigerTestCase): nrecords = 500 bigvalue = 'a' * 500 # we use values that will definitely give compression - # Override WiredTigerTestCase, we have extensions. - def setUpConnectionOpen(self, dir): + def conn_extensions(self, extlist): + extlist.skip_if_missing = True + extlist.extension('encryptors', self.sys_encrypt) + extlist.extension('encryptors', self.file_encrypt) + extlist.extension('compressors', self.block_compress) + extlist.extension('compressors', self.log_compress) + + def conn_config(self): encarg = 'encryption=(name={0}{1}),'.format( self.sys_encrypt, self.sys_encrypt_args) comparg = '' if self.log_compress != None: comparg='log=(compressor={0}),'.format(self.log_compress) - extarg = self.extensionArg([('encryptors', self.sys_encrypt), - ('encryptors', self.file_encrypt), - ('compressors', self.block_compress), - ('compressors', self.log_compress)]) - conn = self.wiredtiger_open(dir, - 'create,error_prefix="{0}: ",{1}{2}{3}'.format( - self.shortid(), encarg, comparg, extarg)) - self.pr(`conn`) - return conn - - # Return the wiredtiger_open extension argument for a shared library. - def extensionArg(self, exts): - extfiles = [] - for ext in exts: - (dirname, name) = ext - if name != None and name != 'none': - testdir = os.path.dirname(__file__) - extdir = os.path.join(run.wt_builddir, 'ext', dirname) - extfile = os.path.join( - extdir, name, '.libs', 'libwiredtiger_' + name + '.so') - if not os.path.exists(extfile): - self.skipTest('extension "' + extfile + '" not built') - if not extfile in extfiles: - extfiles.append(extfile) - if len(extfiles) == 0: - return '' - else: - return ',extensions=["' + '","'.join(extfiles) + '"]' + return encarg + comparg def getvalue(self, r, n): if n < len(self.bigvalue): diff --git a/test/suite/test_encrypt06.py b/test/suite/test_encrypt06.py index 893c4ba3095..3dd7ac17eff 100644 --- a/test/suite/test_encrypt06.py +++ b/test/suite/test_encrypt06.py @@ -89,38 +89,14 @@ class test_encrypt06(wttest.WiredTigerTestCase): scenarios = make_scenarios(encrypt, storagetype) nrecords = 1000 - # Override WiredTigerTestCase, we have extensions. - def setUpConnectionOpen(self, dir): - encarg = 'encryption=(name={0}{1}),'.format( + def conn_extensions(self, extlist): + extlist.extension('encryptors', self.sys_encrypt) + extlist.extension('encryptors', self.file0_encrypt) + extlist.extension('encryptors', self.file1_encrypt) + + def conn_config(self): + return 'encryption=(name={0}{1}),'.format( self.sys_encrypt, self.sys_encrypt_args) - comparg = '' - extarg = self.extensionArg([('encryptors', self.sys_encrypt), - ('encryptors', self.file0_encrypt), - ('encryptors', self.file1_encrypt)]) - self.open_params = 'create,error_prefix="{0}: ",{1}{2}{3}'.format( - self.shortid(), encarg, comparg, extarg) - conn = self.wiredtiger_open(dir, self.open_params) - self.pr(`conn`) - return conn - - # Return the wiredtiger_open extension argument for a shared library. - def extensionArg(self, exts): - extfiles = [] - for ext in exts: - (dirname, name) = ext - if name != None and name != 'none': - testdir = os.path.dirname(__file__) - extdir = os.path.join(run.wt_builddir, 'ext', dirname) - extfile = os.path.join( - extdir, name, '.libs', 'libwiredtiger_' + name + '.so') - if not os.path.exists(extfile): - self.skipTest('extension "' + extfile + '" not built') - if not extfile in extfiles: - extfiles.append(extfile) - if len(extfiles) == 0: - return '' - else: - return ',extensions=["' + '","'.join(extfiles) + '"]' def encrypt_file_params(self, name, args): if name == None: diff --git a/test/suite/test_encrypt07.py b/test/suite/test_encrypt07.py index 97ab1987d4f..1c342783353 100644 --- a/test/suite/test_encrypt07.py +++ b/test/suite/test_encrypt07.py @@ -44,35 +44,12 @@ class test_encrypt07(test_salvage.test_salvage): nrecords = 5000 bigvalue = "abcdefghij" * 1007 # len(bigvalue) = 10070 - # Override WiredTigerTestCase, we have extensions. - def setUpConnectionOpen(self, dir): - encarg = 'encryption=(name={0}{1}),'.format( - self.sys_encrypt, self.sys_encrypt_args) - extarg = self.extensionArg([('encryptors', self.sys_encrypt)]) - conn = self.wiredtiger_open(dir, - 'create,error_prefix="{0}: ",{1}{2}'.format( - self.shortid(), encarg, extarg)) - self.pr(`conn`) - return conn + def conn_extensions(self, extlist): + extlist.extension('encryptors', self.sys_encrypt) - # Return the wiredtiger_open extension argument for a shared library. - def extensionArg(self, exts): - extfiles = [] - for ext in exts: - (dirname, name) = ext - if name != None and name != 'none': - testdir = os.path.dirname(__file__) - extdir = os.path.join(run.wt_builddir, 'ext', dirname) - extfile = os.path.join( - extdir, name, '.libs', 'libwiredtiger_' + name + '.so') - if not os.path.exists(extfile): - self.skipTest('extension "' + extfile + '" not built') - if not extfile in extfiles: - extfiles.append(extfile) - if len(extfiles) == 0: - return '' - else: - return ',extensions=["' + '","'.join(extfiles) + '"]' + def conn_config(self): + return 'encryption=(name={0}{1}),'.format( + self.sys_encrypt, self.sys_encrypt_args) def rot13(self, s): return codecs.encode(s, 'rot_13') diff --git a/test/suite/test_join03.py b/test/suite/test_join03.py index edab7146a6b..fe47b75f99b 100644 --- a/test/suite/test_join03.py +++ b/test/suite/test_join03.py @@ -36,33 +36,7 @@ class test_join03(wttest.WiredTigerTestCase): table_name1 = 'test_join03' nentries = 100 - # Return the wiredtiger_open extension argument for a shared library. - def extensionArg(self, exts): - extfiles = [] - for ext in exts: - (dirname, name, libname) = ext - if name != None and name != 'none': - testdir = os.path.dirname(__file__) - extdir = os.path.join(run.wt_builddir, 'ext', dirname) - extfile = os.path.join( - extdir, name, '.libs', 'libwiredtiger_' + libname + '.so') - if not os.path.exists(extfile): - self.skipTest('extension "' + extfile + '" not built') - if not extfile in extfiles: - extfiles.append(extfile) - if len(extfiles) == 0: - return '' - else: - return ',extensions=["' + '","'.join(extfiles) + '"]' - - # Override WiredTigerTestCase, we have extensions. - def setUpConnectionOpen(self, dir): - extarg = self.extensionArg([('extractors', 'csv', 'csv_extractor')]) - connarg = 'create,error_prefix="{0}: ",{1}'.format( - self.shortid(), extarg) - conn = self.wiredtiger_open(dir, connarg) - self.pr(`conn`) - return conn + conn_extensions = [ 'extractors/csv' ] def gen_key(self, i): return [ i + 1 ] diff --git a/test/suite/test_join04.py b/test/suite/test_join04.py index a71418d9f05..4190f299676 100644 --- a/test/suite/test_join04.py +++ b/test/suite/test_join04.py @@ -36,33 +36,7 @@ class test_join04(wttest.WiredTigerTestCase): table_name1 = 'test_join04' nentries = 100 - # Return the wiredtiger_open extension argument for a shared library. - def extensionArg(self, exts): - extfiles = [] - for ext in exts: - (dirname, name, libname) = ext - if name != None and name != 'none': - testdir = os.path.dirname(__file__) - extdir = os.path.join(run.wt_builddir, 'ext', dirname) - extfile = os.path.join( - extdir, name, '.libs', 'libwiredtiger_' + libname + '.so') - if not os.path.exists(extfile): - self.skipTest('extension "' + extfile + '" not built') - if not extfile in extfiles: - extfiles.append(extfile) - if len(extfiles) == 0: - return '' - else: - return ',extensions=["' + '","'.join(extfiles) + '"]' - - # Override WiredTigerTestCase, we have extensions. - def setUpConnectionOpen(self, dir): - extarg = self.extensionArg([('extractors', 'csv', 'csv_extractor')]) - connarg = 'create,error_prefix="{0}: ",{1}'.format( - self.shortid(), extarg) - conn = self.wiredtiger_open(dir, connarg) - self.pr(`conn`) - return conn + conn_extensions = [ 'extractors/csv' ] # JIRA WT-2308: # Test extractors with equality joins diff --git a/test/suite/test_join07.py b/test/suite/test_join07.py index 2a32e678d72..6a31970250f 100644 --- a/test/suite/test_join07.py +++ b/test/suite/test_join07.py @@ -200,33 +200,7 @@ class test_join07(wttest.WiredTigerTestCase): scenarios = make_scenarios(extractscen) - # Return the wiredtiger_open extension argument for a shared library. - def extensionArg(self, exts): - extfiles = [] - for ext in exts: - (dirname, name, libname) = ext - if name != None and name != 'none': - testdir = os.path.dirname(__file__) - extdir = os.path.join(run.wt_builddir, 'ext', dirname) - extfile = os.path.join( - extdir, name, '.libs', 'libwiredtiger_' + libname + '.so') - if not os.path.exists(extfile): - self.skipTest('extension "' + extfile + '" not built') - if not extfile in extfiles: - extfiles.append(extfile) - if len(extfiles) == 0: - return '' - else: - return ',extensions=["' + '","'.join(extfiles) + '"]' - - # Override WiredTigerTestCase, we have extensions. - def setUpConnectionOpen(self, dir): - extarg = self.extensionArg([('extractors', 'csv', 'csv_extractor')]) - connarg = 'create,error_prefix="{0}: ",{1}'.format( - self.shortid(), extarg) - conn = self.wiredtiger_open(dir, connarg) - self.pr(`conn`) - return conn + conn_extensions = [ 'extractors/csv' ] def expect(self, token, expected): if token == None or token.kind not in expected: diff --git a/test/suite/test_readonly01.py b/test/suite/test_readonly01.py index e4b431ca1da..f41280a3283 100644 --- a/test/suite/test_readonly01.py +++ b/test/suite/test_readonly01.py @@ -75,8 +75,7 @@ class test_readonly01(wttest.WiredTigerTestCase, suite_subprocess): scenarios = make_scenarios(basecfg_list, dir_list, log_list, types) - def conn_config(self, dir): - self.home = dir + def conn_config(self): params = \ 'error_prefix="%s",' % self.shortid() + \ '%s' % self.logcfg + \ diff --git a/test/suite/test_schema05.py b/test/suite/test_schema05.py index 28ad51b3c92..bb3d4f49006 100644 --- a/test/suite/test_schema05.py +++ b/test/suite/test_schema05.py @@ -57,33 +57,7 @@ class test_schema05(wttest.WiredTigerTestCase): ('index-after', { 'create_index' : 2 }), ]) - # Return the wiredtiger_open extension argument for a shared library. - def extensionArg(self, exts): - extfiles = [] - for ext in exts: - (dirname, name, libname) = ext - if name != None and name != 'none': - testdir = os.path.dirname(__file__) - extdir = os.path.join(run.wt_builddir, 'ext', dirname) - extfile = os.path.join( - extdir, name, '.libs', 'libwiredtiger_' + libname + '.so') - if not os.path.exists(extfile): - self.skipTest('extension "' + extfile + '" not built') - if not extfile in extfiles: - extfiles.append(extfile) - if len(extfiles) == 0: - return '' - else: - return ',extensions=["' + '","'.join(extfiles) + '"]' - - # Override WiredTigerTestCase, we have extensions. - def setUpConnectionOpen(self, dir): - extarg = self.extensionArg([('extractors', 'csv', 'csv_extractor')]) - connarg = 'create,error_prefix="{0}: ",{1}'.format( - self.shortid(), extarg) - conn = self.wiredtiger_open(dir, connarg) - self.pr(`conn`) - return conn + conn_extensions = [ 'extractors/csv' ] def create_indices(self): # Create self.nindices index files, each with a column from the CSV diff --git a/test/suite/test_schema07.py b/test/suite/test_schema07.py index ac397c6e1a1..3e4b1d28a4d 100644 --- a/test/suite/test_schema07.py +++ b/test/suite/test_schema07.py @@ -33,8 +33,7 @@ import wiredtiger, wttest class test_schema07(wttest.WiredTigerTestCase): tablename = 'table:test_schema07' - def conn_config(self, dir): - return 'cache_size=10MB' + conn_config = 'cache_size=10MB' @wttest.longtest("Creating many tables shouldn't fill the cache") def test_many_tables(self): diff --git a/test/suite/test_stat02.py b/test/suite/test_stat02.py index cecda7f1ddc..45af283ed02 100644 --- a/test/suite/test_stat02.py +++ b/test/suite/test_stat02.py @@ -59,7 +59,7 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase): scenarios = make_scenarios(uri, data_config, cursor_config) # Turn on statistics for this test. - def conn_config(self, dir): + def conn_config(self): return 'statistics=(%s)' % self.data_config # For each database/cursor configuration, confirm the right combinations diff --git a/test/suite/test_txn02.py b/test/suite/test_txn02.py index 7c2a58516bc..01626057b9e 100644 --- a/test/suite/test_txn02.py +++ b/test/suite/test_txn02.py @@ -93,11 +93,10 @@ class test_txn02(wttest.WiredTigerTestCase, suite_subprocess): checklog_calls = 100 if wttest.islongtest() else 2 checklog_mod = (len(scenarios) / checklog_calls + 1) - def setUpConnectionOpen(self, dir): - self.home = dir + def conn_config(self): # Cycle through the different transaction_sync values in a # deterministic manner. - self.txn_sync = self.sync_list[ + txn_sync = self.sync_list[ self.scenario_number % len(self.sync_list)] # # We don't want to run zero fill with only the same settings, such @@ -107,17 +106,9 @@ class test_txn02(wttest.WiredTigerTestCase, suite_subprocess): zerofill = 'false' if self.scenario_number % freq == 0: zerofill = 'true' - self.backup_dir = os.path.join(self.home, "WT_BACKUP") - conn_params = \ - 'log=(archive=false,enabled,file_max=%s),' % self.logmax + \ - 'log=(zero_fill=%s),' % zerofill + \ - 'create,error_prefix="%s: ",' % self.shortid() + \ - 'transaction_sync="%s",' % self.txn_sync - # print "Creating conn at '%s' with config '%s'" % (dir, conn_params) - conn = self.wiredtiger_open(dir, conn_params) - self.pr(`conn`) - self.session2 = conn.open_session() - return conn + return 'log=(archive=false,enabled,file_max=%s),' % self.logmax + \ + 'log=(zero_fill=%s),' % zerofill + \ + 'transaction_sync="%s",' % txn_sync # Check that a cursor (optionally started in a new transaction), sees the # expected values. @@ -206,6 +197,8 @@ class test_txn02(wttest.WiredTigerTestCase, suite_subprocess): self.assertEqual(cur_logs, pr_logs) def test_ops(self): + self.backup_dir = os.path.join(self.home, "WT_BACKUP") + self.session2 = self.conn.open_session() # print "Creating %s with config '%s'" % (self.uri, self.create_params) self.session.create(self.uri, self.create_params) # Set up the table with entries for 1, 2, 10 and 11. @@ -228,6 +221,7 @@ class test_txn02(wttest.WiredTigerTestCase, suite_subprocess): # Close and reopen the connection and cursor. if reopen == 'reopen': self.reopen_conn() + self.session2 = self.conn.open_session() c = self.session.open_cursor(self.uri, None, 'overwrite') self.session.begin_transaction( diff --git a/test/suite/test_txn04.py b/test/suite/test_txn04.py index ade39272f84..d8f6774ded1 100644 --- a/test/suite/test_txn04.py +++ b/test/suite/test_txn04.py @@ -63,24 +63,15 @@ class test_txn04(wttest.WiredTigerTestCase, suite_subprocess): txn1s = [('t1c', dict(txn1='commit')), ('t1r', dict(txn1='rollback'))] scenarios = make_scenarios(types, op1s, txn1s) - # Overrides WiredTigerTestCase - def setUpConnectionOpen(self, dir): - self.home = dir + + def conn_config(self): # Cycle through the different transaction_sync values in a # deterministic manner. - self.txn_sync = self.sync_list[ + txn_sync = self.sync_list[ self.scenario_number % len(self.sync_list)] - self.backup_dir = os.path.join(self.home, "WT_BACKUP") # Set archive false on the home directory. - conn_params = \ - 'log=(archive=false,enabled,file_max=%s),' % self.logmax + \ - 'create,error_prefix="%s: ",' % self.shortid() + \ - 'transaction_sync="%s",' % self.txn_sync - # print "Creating conn at '%s' with config '%s'" % (dir, conn_params) - conn = self.wiredtiger_open(dir, conn_params) - self.pr(`conn`) - self.session2 = conn.open_session() - return conn + return 'log=(archive=false,enabled,file_max=%s),' % self.logmax + \ + 'transaction_sync="%s",' % txn_sync # Check that a cursor (optionally started in a new transaction), sees the # expected values. @@ -146,6 +137,7 @@ class test_txn04(wttest.WiredTigerTestCase, suite_subprocess): # The runWt command closes our connection and sessions so # we need to reopen them here. self.hot_backup(None, committed) + self.session2 = self.conn.open_session() c = self.session.open_cursor(self.uri, None, 'overwrite') c.set_value(1) # Then do the given modification. @@ -193,6 +185,8 @@ class test_txn04(wttest.WiredTigerTestCase, suite_subprocess): self.hot_backup(self.uri, committed) def test_ops(self): + self.backup_dir = os.path.join(self.home, "WT_BACKUP") + self.session2 = self.conn.open_session() with self.expectedStdoutPattern('recreating metadata'): self.ops() diff --git a/test/suite/test_txn05.py b/test/suite/test_txn05.py index 5913c4688a3..7aaff221ba4 100644 --- a/test/suite/test_txn05.py +++ b/test/suite/test_txn05.py @@ -64,23 +64,15 @@ class test_txn05(wttest.WiredTigerTestCase, suite_subprocess): txn1s = [('t1c', dict(txn1='commit')), ('t1r', dict(txn1='rollback'))] scenarios = make_scenarios(types, op1s, txn1s) - # Overrides WiredTigerTestCase - def setUpConnectionOpen(self, dir): - self.home = dir + + def conn_config(self): # Cycle through the different transaction_sync values in a # deterministic manner. - self.txn_sync = self.sync_list[ + txn_sync = self.sync_list[ self.scenario_number % len(self.sync_list)] - self.backup_dir = os.path.join(self.home, "WT_BACKUP") - conn_params = \ - 'log=(archive=false,enabled,file_max=%s),' % self.logmax + \ - 'create,error_prefix="%s: ",' % self.shortid() + \ - 'transaction_sync="%s",' % self.txn_sync - # print "Creating conn at '%s' with config '%s'" % (dir, conn_params) - conn = self.wiredtiger_open(dir, conn_params) - self.pr(`conn`) - self.session2 = conn.open_session() - return conn + # Set archive false on the home directory. + return 'log=(archive=false,enabled,file_max=%s),' % self.logmax + \ + 'transaction_sync="%s",' % txn_sync # Check that a cursor (optionally started in a new transaction), sees the # expected values. @@ -167,6 +159,8 @@ class test_txn05(wttest.WiredTigerTestCase, suite_subprocess): self.runWt(['-h', self.backup_dir, 'printlog'], outfilename='printlog.out') def test_ops(self): + self.backup_dir = os.path.join(self.home, "WT_BACKUP") + self.session2 = self.conn.open_session() # print "Creating %s with config '%s'" % (self.uri, self.create_params) self.session.create(self.uri, self.create_params) # Set up the table with entries for 1-5. diff --git a/test/suite/test_txn06.py b/test/suite/test_txn06.py index 2bff97f6aac..c91dc6a623b 100644 --- a/test/suite/test_txn06.py +++ b/test/suite/test_txn06.py @@ -40,10 +40,10 @@ class test_txn06(wttest.WiredTigerTestCase, suite_subprocess): source_uri = 'table:' + tablename + "_src" nrows = 100000 - def setUpConnectionOpen(self, *args): + def conn_config(self): if not wiredtiger.verbose_build(): self.skipTest('requires a verbose build') - return super(test_txn06, self).setUpConnectionOpen(*args) + return '' def test_long_running(self): # Populate a table diff --git a/test/suite/test_txn07.py b/test/suite/test_txn07.py index a08d68f88aa..e2986fb999a 100644 --- a/test/suite/test_txn07.py +++ b/test/suite/test_txn07.py @@ -72,42 +72,18 @@ class test_txn07(wttest.WiredTigerTestCase, suite_subprocess): scenarios = make_scenarios(types, op1s, txn1s, compress, prune=30, prunelong=1000) - # Overrides WiredTigerTestCase - def setUpConnectionOpen(self, dir): - self.home = dir - # Cycle through the different transaction_sync values in a - # deterministic manner. - self.txn_sync = self.sync_list[ - self.scenario_number % len(self.sync_list)] - self.backup_dir = os.path.join(self.home, "WT_BACKUP") - conn_params = \ - 'log=(archive=false,enabled,file_max=%s,' % self.logmax + \ - 'compressor=%s)' % self.compress + \ - self.extensionArg(self.compress) + \ - ',create,error_prefix="%s: ",' % self.shortid() + \ - "statistics=(fast)," + \ - 'transaction_sync="%s",' % self.txn_sync - # print "Creating conn at '%s' with config '%s'" % (dir, conn_params) - try: - conn = self.wiredtiger_open(dir, conn_params) - except wiredtiger.WiredTigerError as e: - print "Failed conn at '%s' with config '%s'" % (dir, conn_params) - self.pr(`conn`) - self.session2 = conn.open_session() - return conn - - # Return the wiredtiger_open extension argument for a shared library. - def extensionArg(self, name): - if name == None or name == '': - return '' - - testdir = os.path.dirname(__file__) - extdir = os.path.join(run.wt_builddir, 'ext/compressors') - extfile = os.path.join( - extdir, name, '.libs', 'libwiredtiger_' + name + '.so') - if not os.path.exists(extfile): - self.skipTest('compression extension "' + extfile + '" not built') - return ',extensions=["' + extfile + '"]' + + def conn_config(self): + return 'log=(archive=false,enabled,file_max=%s,' % self.logmax + \ + 'compressor=%s)' % self.compress + \ + ',create,error_prefix="%s: ",' % self.shortid() + \ + "statistics=(fast)," + \ + 'transaction_sync="%s",' % \ + self.sync_list[self.scenario_number % len(self.sync_list)] + + def conn_extensions(self, extlist): + extlist.skip_if_missing = True + extlist.extension('compressors', self.compress) # Check that a cursor (optionally started in a new transaction), sees the # expected values. @@ -140,7 +116,7 @@ class test_txn07(wttest.WiredTigerTestCase, suite_subprocess): self.backup(self.backup_dir) backup_conn_params = 'log=(enabled,file_max=%s,' % self.logmax + \ 'compressor=%s)' % self.compress + \ - self.extensionArg(self.compress) + self.extensionsConfig() backup_conn = self.wiredtiger_open(self.backup_dir, backup_conn_params) try: self.check(backup_conn.open_session(), None, committed) @@ -148,6 +124,9 @@ class test_txn07(wttest.WiredTigerTestCase, suite_subprocess): backup_conn.close() def test_ops(self): + self.backup_dir = os.path.join(self.home, "WT_BACKUP") + self.session2 = self.conn.open_session() + # print "Creating %s with config '%s'" % (self.uri, self.create_params) self.session.create(self.uri, self.create_params) # Set up the table with entries for 1-5. diff --git a/test/suite/test_txn08.py b/test/suite/test_txn08.py index f0cdf08df07..04faed9d45a 100644 --- a/test/suite/test_txn08.py +++ b/test/suite/test_txn08.py @@ -41,7 +41,7 @@ class test_txn08(wttest.WiredTigerTestCase, suite_subprocess): uri = 'table:' + tablename # Turn on logging for this test. - def conn_config(self, dir): + def conn_config(self): return 'log=(archive=false,enabled,file_max=%s),' % self.logmax + \ 'transaction_sync="(method=dsync,enabled)"' diff --git a/test/suite/test_txn09.py b/test/suite/test_txn09.py index cfad8270ab1..768d714e248 100644 --- a/test/suite/test_txn09.py +++ b/test/suite/test_txn09.py @@ -80,19 +80,9 @@ class test_txn09(wttest.WiredTigerTestCase, suite_subprocess): op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s, prune=20, prunelong=5000) - # Overrides WiredTigerTestCase - def setUpConnectionOpen(self, dir): - self.home = dir - conn_params = \ - 'create,error_prefix="%s: ",' % self.shortid() + \ - 'log=(archive=false,enabled=%s),' % int(self.log_enabled) + \ - 'transaction_sync=(enabled=false),' - - # print "Opening conn at '%s' with config '%s'" % (dir, conn_params) - conn = self.wiredtiger_open(dir, conn_params) - self.pr(`conn`) - self.session2 = conn.open_session() - return conn + def conn_config(self): + return 'log=(archive=false,enabled=%s),' % int(self.log_enabled) + \ + 'transaction_sync=(enabled=false)' # Check that a cursor (optionally started in a new transaction), sees the # expected values. @@ -141,6 +131,7 @@ class test_txn09(wttest.WiredTigerTestCase, suite_subprocess): # Close and reopen the connection and cursor, toggling the log self.log_enabled = not self.log_enabled self.reopen_conn() + self.session2 = self.conn.open_session() c = self.session.open_cursor(self.uri, None, 'overwrite') self.session.begin_transaction( diff --git a/test/suite/test_txn11.py b/test/suite/test_txn11.py index 147bf3a76c0..3c02b1e86e3 100644 --- a/test/suite/test_txn11.py +++ b/test/suite/test_txn11.py @@ -44,7 +44,7 @@ class test_txn11(wttest.WiredTigerTestCase, suite_subprocess): uri = 'table:' + tablename # Turn on logging for this test. - def conn_config(self, dir): + def conn_config(self): return 'log=(archive=%s,' % self.archive + \ 'enabled,file_max=%s,prealloc=false),' % self.logmax + \ 'transaction_sync=(enabled=false),' diff --git a/test/suite/test_txn13.py b/test/suite/test_txn13.py index ae0250c06e8..2bf49486b3a 100644 --- a/test/suite/test_txn13.py +++ b/test/suite/test_txn13.py @@ -50,7 +50,7 @@ class test_txn13(wttest.WiredTigerTestCase, suite_subprocess): ]) # Turn on logging for this test. - def conn_config(self, dir): + def conn_config(self): return 'log=(archive=false,enabled,file_max=%s)' % self.logmax + \ ',cache_size=8G' diff --git a/test/suite/test_txn15.py b/test/suite/test_txn15.py index c061c093b02..a2bfb626338 100644 --- a/test/suite/test_txn15.py +++ b/test/suite/test_txn15.py @@ -41,7 +41,7 @@ class test_txn15(wttest.WiredTigerTestCase, suite_subprocess): create_params = 'key_format=i,value_format=i' entries = 100 # Turn on logging for this test. - def conn_config(self, dir): + def conn_config(self): return 'statistics=(fast),' + \ 'log=(archive=false,enabled,file_max=100K),' + \ 'use_environment=false,' + \ diff --git a/test/suite/wttest.py b/test/suite/wttest.py index bd6d2005cd9..0dce51f07d5 100644 --- a/test/suite/wttest.py +++ b/test/suite/wttest.py @@ -37,9 +37,8 @@ except ImportError: import unittest from contextlib import contextmanager -import os, re, shutil, sys, time, traceback -import wtscenario -import wiredtiger +import glob, os, re, shutil, sys, time, traceback +import wiredtiger, wtscenario def shortenWithEllipsis(s, maxlen): if len(s) > maxlen: @@ -152,6 +151,14 @@ class TestSuiteConnection(object): else: return getattr(self._conn, attr) +# Just like a list of strings, but with a convenience function +class ExtensionList(list): + skipIfMissing = False + def extension(self, dirname, name, extarg=None): + if name != None and name != 'none': + ext = '' if extarg == None else '=' + extarg + self.append(dirname + '/' + name + ext) + class WiredTigerTestCase(unittest.TestCase): _globalSetup = False _printOnceSeen = {} @@ -160,9 +167,16 @@ class WiredTigerTestCase(unittest.TestCase): # Can be a string or a callable function or lambda expression. conn_config = '' + # conn_extensions can be overridden to add a list of extensions to load. + # Each entry is a string (directory and extension name) and optional config. + # Example: + # conn_extensions = ('extractors/csv_extractor', + # 'test/fail_fs={allow_writes=100}') + conn_extensions = () + @staticmethod def globalSetup(preserveFiles = False, useTimestamp = False, - gdbSub = False, verbose = 1, dirarg = None, + gdbSub = False, verbose = 1, builddir = None, dirarg = None, longtest = False): WiredTigerTestCase._preserveFiles = preserveFiles d = 'WT_TEST' if dirarg == None else dirarg @@ -172,6 +186,7 @@ class WiredTigerTestCase(unittest.TestCase): os.makedirs(d) wtscenario.set_long_run(longtest) WiredTigerTestCase._parentTestdir = d + WiredTigerTestCase._builddir = builddir WiredTigerTestCase._origcwd = os.getcwd() WiredTigerTestCase._resultfile = open(os.path.join(d, 'results.txt'), "w", 0) # unbuffered WiredTigerTestCase._gdbSubprocess = gdbSub @@ -224,12 +239,66 @@ class WiredTigerTestCase(unittest.TestCase): return "%s.%s.%s" % (self.__module__, self.className(), self._testMethodName) - # Can be overridden, but first consider setting self.conn_config . + # Return the wiredtiger_open extension argument for + # any needed shared library. + def extensionsConfig(self): + exts = self.conn_extensions + if hasattr(exts, '__call__'): + exts = ExtensionList() + self.conn_extensions(exts) + result = '' + extfiles = {} + skipIfMissing = False + if hasattr(exts, 'skip_if_missing'): + skipIfMissing = exts.skip_if_missing + for ext in exts: + extconf = '' + if '=' in ext: + splits = ext.split('=', 1) + ext = splits[0] + extconf = '=' + splits[1] + splits = ext.split('/') + if len(splits) != 2: + raise Exception(self.shortid() + + ": " + ext + + ": extension is not named /") + libname = splits[1] + dirname = splits[0] + pat = os.path.join(WiredTigerTestCase._builddir, 'ext', + dirname, libname, '.libs', 'libwiredtiger_*.so') + filenames = glob.glob(pat) + if len(filenames) == 0: + if skipIfMissing: + self.skipTest('extension "' + ext + '" not built') + continue + else: + raise Exception(self.shortid() + + ": " + ext + + ": no extensions library found matching: " + pat) + elif len(filenames) > 1: + raise Exception(self.shortid() + + ": " + ext + + ": multiple extensions libraries found matching: " + pat) + complete = '"' + filenames[0] + '"' + extconf + if ext in extfiles: + if extfiles[ext] != complete: + raise Exception(self.shortid() + + ": non-matching extension arguments in " + + str(exts)) + else: + extfiles[ext] = complete + if len(extfiles) != 0: + result = ',extensions=[' + ','.join(extfiles.values()) + ']' + return result + + # Can be overridden, but first consider setting self.conn_config + # or self.conn_extensions def setUpConnectionOpen(self, home): self.home = home config = self.conn_config if hasattr(config, '__call__'): - config = config(home) + config = self.conn_config() + config += self.extensionsConfig() # In case the open starts additional threads, flush first to # avoid confusion. sys.stdout.flush() -- cgit v1.2.1 From 4da006a05bf3e01ebbfcfd7d55ee67e84413f44a Mon Sep 17 00:00:00 2001 From: sueloverso Date: Mon, 16 Jan 2017 20:20:43 -0500 Subject: WT-3105 Avoid thread group deadlock on close in new dynamic eviction code. (#3242) --- src/evict/evict_lru.c | 29 +++++++++++++++++++++++------ src/support/thread_group.c | 8 ++++---- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 948c1e1139e..9b969de9a9e 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -912,6 +912,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) struct timespec current_time; WT_CACHE *cache; WT_CONNECTION_IMPL *conn; + WT_DECL_RET; uint64_t cur_threads, delta_msec, delta_pages, i, target_threads; uint64_t pgs_evicted_cur, pgs_evicted_persec_cur; uint32_t thread_surplus; @@ -945,7 +946,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) * Otherwise, we just record the number of evicted pages and return. */ if (conn->evict_tune_pgs_last == 0) - goto out; + goto err; delta_msec = WT_TIMEDIFF_MS(current_time, conn->evict_tune_last_time); delta_pages = pgs_evicted_cur - conn->evict_tune_pgs_last; @@ -995,8 +996,13 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_tune_workers_best; for (i = 0; i < thread_surplus; i++) { - WT_RET(__wt_thread_group_stop_one(session, - &conn->evict_threads, true)); + /* + * If we get an error, it should be because we + * were unable to acquire the thread group lock. + * Break out of trying. + */ + WT_ERR(__wt_thread_group_stop_one( + session, &conn->evict_threads, false)); WT_STAT_CONN_INCR(session, cache_eviction_worker_removed); } @@ -1029,7 +1035,12 @@ __evict_tune_workers(WT_SESSION_IMPL *session) * Start the new threads. */ for (i = 0; i < (target_threads - cur_threads); ++i) { - WT_RET(__wt_thread_group_start_one(session, + /* + * If we get an error, it should be because we were + * unable to acquire the thread group lock. Break out + * of trying. + */ + WT_ERR(__wt_thread_group_start_one(session, &conn->evict_threads, false)); WT_STAT_CONN_INCR(session, cache_eviction_worker_created); @@ -1042,9 +1053,15 @@ __evict_tune_workers(WT_SESSION_IMPL *session) WT_STAT_CONN_SET(session, cache_eviction_active_workers, conn->evict_threads.current_threads); -out: conn->evict_tune_last_time = current_time; +err: conn->evict_tune_last_time = current_time; conn->evict_tune_pgs_last = pgs_evicted_cur; - return (0); + /* + * If we got an EBUSY trying to acquire the lock just return. + * We can try to tune the workers next time. + */ + if (ret == EBUSY) + ret = 0; + return (ret); } /* diff --git a/src/support/thread_group.c b/src/support/thread_group.c index d04f8977a9a..beb143e63e2 100644 --- a/src/support/thread_group.c +++ b/src/support/thread_group.c @@ -325,8 +325,8 @@ __wt_thread_group_start_one( if (wait) __wt_writelock(session, &group->lock); - else if (__wt_try_writelock(session, &group->lock) != 0) - return (0); + else + WT_RET(__wt_try_writelock(session, &group->lock)); /* Recheck the bounds now that we hold the lock */ if (group->current_threads < group->max) @@ -352,8 +352,8 @@ __wt_thread_group_stop_one( if (wait) __wt_writelock(session, &group->lock); - else if (__wt_try_writelock(session, &group->lock) != 0) - return (0); + else + WT_RET(__wt_try_writelock(session, &group->lock)); /* Recheck the bounds now that we hold the lock */ if (group->current_threads > group->min) -- cgit v1.2.1 From 04923774e5ede7a16c45ea31bd020e153a2a7666 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 16 Jan 2017 20:27:05 -0500 Subject: WT-3127 Fix a bug: CPU yield calls don't necessarily imply memory barriers (#3244) Add a full-barrier as part of the yield call. --- src/os_posix/os_yield.c | 8 ++++++++ src/os_win/os_yield.c | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/src/os_posix/os_yield.c b/src/os_posix/os_yield.c index 37d05bc1854..f7c43aae746 100644 --- a/src/os_posix/os_yield.c +++ b/src/os_posix/os_yield.c @@ -16,5 +16,13 @@ void __wt_yield(void) WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) { + /* + * Yielding the processor isn't documented as a memory barrier, and it's + * a reasonable expectation to have. There's no reason not to explicitly + * include a barrier since we're giving up the CPU, and ensures callers + * aren't ever surprised. + */ + WT_FULL_BARRIER(); + sched_yield(); } diff --git a/src/os_win/os_yield.c b/src/os_win/os_yield.c index aab1559e072..038f2efe162 100644 --- a/src/os_win/os_yield.c +++ b/src/os_win/os_yield.c @@ -15,5 +15,13 @@ void __wt_yield(void) { + /* + * Yielding the processor isn't documented as a memory barrier, and it's + * a reasonable expectation to have. There's no reason not to explicitly + * include a barrier since we're giving up the CPU, and ensures callers + * aren't ever surprised. + */ + WT_FULL_BARRIER(); + SwitchToThread(); } -- cgit v1.2.1 From f8c20c2b1c258126cc162721eccd51ea4282e1b7 Mon Sep 17 00:00:00 2001 From: David Hows Date: Wed, 18 Jan 2017 01:05:03 +1100 Subject: WT-3121 Make all ROTN encryption tests skipable (#3247) * WT-3121 test/suite: Make all tests skippable that use encryptors, collators, extractors in extensions. --- test/suite/test_collator.py | 5 ++++- test/suite/test_encrypt02.py | 16 +++++++++------- test/suite/test_encrypt03.py | 1 + test/suite/test_encrypt04.py | 1 + test/suite/test_encrypt06.py | 1 + test/suite/test_encrypt07.py | 2 ++ test/suite/test_join03.py | 4 +++- test/suite/test_join04.py | 4 +++- test/suite/test_join07.py | 4 +++- test/suite/test_schema05.py | 4 +++- 10 files changed, 30 insertions(+), 12 deletions(-) diff --git a/test/suite/test_collator.py b/test/suite/test_collator.py index e7be557335e..7ce135c8976 100644 --- a/test/suite/test_collator.py +++ b/test/suite/test_collator.py @@ -48,7 +48,10 @@ class test_collator(wttest.WiredTigerTestCase): nentries = 100 nindices = 4 - conn_extensions = [ 'extractors/csv', 'collators/revint' ] + def conn_extensions(self, extlist): + extlist.skip_if_missing = True + extlist.extension('extractors', 'csv') + extlist.extension('collators', 'revint') def create_indices(self): # Create self.nindices index files, each with a column from the CSV diff --git a/test/suite/test_encrypt02.py b/test/suite/test_encrypt02.py index 2d3b8a29b13..d950be067e2 100644 --- a/test/suite/test_encrypt02.py +++ b/test/suite/test_encrypt02.py @@ -39,17 +39,19 @@ from wtscenario import make_scenarios class test_encrypt02(wttest.WiredTigerTestCase, suite_subprocess): uri = 'file:test_encrypt02' encrypt_type = [ - ('noarg', dict( conn_extensions=[ 'encryptors/rotn' ], - encrypt_args='name=rotn', secret_arg=None)), - ('keyid', dict( conn_extensions=[ 'encryptors/rotn' ], - encrypt_args='name=rotn,keyid=11', secret_arg=None)), - ('pass', dict( conn_extensions=[ 'encryptors/rotn' ], - encrypt_args='name=rotn', secret_arg='ABC')), - ('keyid-pass', dict( conn_extensions=[ 'encryptors/rotn' ], + ('noarg', dict( encrypt_args='name=rotn', secret_arg=None)), + ('keyid', dict( encrypt_args='name=rotn,keyid=11', secret_arg=None)), + ('pass', dict( encrypt_args='name=rotn', secret_arg='ABC')), + ('keyid-pass', dict( encrypt_args='name=rotn,keyid=11', secret_arg='ABC')), ] scenarios = make_scenarios(encrypt_type) + def conn_extensions(self, extlist): + # Load the compression extension, skip the test if missing + extlist.skip_if_missing = True + extlist.extension('encryptors', 'rotn') + nrecords = 5000 bigvalue = "abcdefghij" * 1001 # len(bigvalue) = 10010 diff --git a/test/suite/test_encrypt03.py b/test/suite/test_encrypt03.py index 0809c16c6d1..302572bd044 100644 --- a/test/suite/test_encrypt03.py +++ b/test/suite/test_encrypt03.py @@ -51,6 +51,7 @@ class test_encrypt03(wttest.WiredTigerTestCase): scenarios = make_scenarios(types, encrypt) def conn_extensions(self, extlist): + extlist.skip_if_missing = True extlist.extension('encryptors', self.sys_encrypt) extlist.extension('encryptors', self.file_encrypt) diff --git a/test/suite/test_encrypt04.py b/test/suite/test_encrypt04.py index a10e6c28831..17777fc9564 100644 --- a/test/suite/test_encrypt04.py +++ b/test/suite/test_encrypt04.py @@ -81,6 +81,7 @@ class test_encrypt04(wttest.WiredTigerTestCase, suite_subprocess): extarg = None if self.expect_forceerror: extarg='(config=\"rotn_force_error=true\")' + extlist.skip_if_missing = True extlist.extension('encryptors', self.name, extarg) # Override WiredTigerTestCase, we have extensions. diff --git a/test/suite/test_encrypt06.py b/test/suite/test_encrypt06.py index 3dd7ac17eff..72718e53b2b 100644 --- a/test/suite/test_encrypt06.py +++ b/test/suite/test_encrypt06.py @@ -90,6 +90,7 @@ class test_encrypt06(wttest.WiredTigerTestCase): nrecords = 1000 def conn_extensions(self, extlist): + extlist.skip_if_missing = True extlist.extension('encryptors', self.sys_encrypt) extlist.extension('encryptors', self.file0_encrypt) extlist.extension('encryptors', self.file1_encrypt) diff --git a/test/suite/test_encrypt07.py b/test/suite/test_encrypt07.py index 1c342783353..81c9f1a49ea 100644 --- a/test/suite/test_encrypt07.py +++ b/test/suite/test_encrypt07.py @@ -45,6 +45,8 @@ class test_encrypt07(test_salvage.test_salvage): bigvalue = "abcdefghij" * 1007 # len(bigvalue) = 10070 def conn_extensions(self, extlist): + # Load the compression extension, skip the test if missing + extlist.skip_if_missing = True extlist.extension('encryptors', self.sys_encrypt) def conn_config(self): diff --git a/test/suite/test_join03.py b/test/suite/test_join03.py index fe47b75f99b..dd8111f6ead 100644 --- a/test/suite/test_join03.py +++ b/test/suite/test_join03.py @@ -36,7 +36,9 @@ class test_join03(wttest.WiredTigerTestCase): table_name1 = 'test_join03' nentries = 100 - conn_extensions = [ 'extractors/csv' ] + def conn_extensions(self, extlist): + extlist.skip_if_missing = True + extlist.extension('extractors', 'csv') def gen_key(self, i): return [ i + 1 ] diff --git a/test/suite/test_join04.py b/test/suite/test_join04.py index 4190f299676..e65b8b53333 100644 --- a/test/suite/test_join04.py +++ b/test/suite/test_join04.py @@ -36,7 +36,9 @@ class test_join04(wttest.WiredTigerTestCase): table_name1 = 'test_join04' nentries = 100 - conn_extensions = [ 'extractors/csv' ] + def conn_extensions(self, extlist): + extlist.skip_if_missing = True + extlist.extension('extractors', 'csv') # JIRA WT-2308: # Test extractors with equality joins diff --git a/test/suite/test_join07.py b/test/suite/test_join07.py index 6a31970250f..8fae3539246 100644 --- a/test/suite/test_join07.py +++ b/test/suite/test_join07.py @@ -200,7 +200,9 @@ class test_join07(wttest.WiredTigerTestCase): scenarios = make_scenarios(extractscen) - conn_extensions = [ 'extractors/csv' ] + def conn_extensions(self, extlist): + extlist.skip_if_missing = True + extlist.extension('extractors', 'csv') def expect(self, token, expected): if token == None or token.kind not in expected: diff --git a/test/suite/test_schema05.py b/test/suite/test_schema05.py index bb3d4f49006..d536a629373 100644 --- a/test/suite/test_schema05.py +++ b/test/suite/test_schema05.py @@ -57,7 +57,9 @@ class test_schema05(wttest.WiredTigerTestCase): ('index-after', { 'create_index' : 2 }), ]) - conn_extensions = [ 'extractors/csv' ] + def conn_extensions(self, extlist): + extlist.skip_if_missing = True + extlist.extension('extractors', 'csv') def create_indices(self): # Create self.nindices index files, each with a column from the CSV -- cgit v1.2.1 From 2d2bb414675e449f46d6412db93bb7b32057af0a Mon Sep 17 00:00:00 2001 From: sueloverso Date: Tue, 17 Jan 2017 15:41:40 -0500 Subject: WT-3118 Protect test against unexpectedly slow child start. (#3248) --- test/recovery/random-abort.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c index c407361c7eb..a6e4d9801e5 100644 --- a/test/recovery/random-abort.c +++ b/test/recovery/random-abort.c @@ -33,7 +33,11 @@ static char home[512]; /* Program working dir */ static const char *progname; /* Program name */ +/* + * These two names for the URI and file system must be maintained in tandem. + */ static const char * const uri = "table:main"; +static const char * const fs_main = "main.wt"; static bool inmem; #define MAX_TH 12 @@ -211,6 +215,7 @@ extern char *__wt_optarg; int main(int argc, char *argv[]) { + struct stat sb; FILE *fp; WT_CONNECTION *conn; WT_CURSOR *cursor; @@ -305,8 +310,15 @@ main(int argc, char *argv[]) /* parent */ /* * Sleep for the configured amount of time before killing - * the child. + * the child. Start the timeout from the time we notice that + * the table has been created. That allows the test to run + * correctly on really slow machines. Verify the process ID + * still exists in case the child aborts for some reason we + * don't stay in this loop forever. */ + snprintf(fname, sizeof(fname), "%s/%s", home, fs_main); + while (stat(fname, &sb) != 0 && kill(pid, 0) == 0) + sleep(1); sleep(timeout); /* -- cgit v1.2.1 From 25a7c8aae547b7a0c50081656935c663c640a9f0 Mon Sep 17 00:00:00 2001 From: Sulabh Mahajan Date: Wed, 18 Jan 2017 13:18:41 +1100 Subject: WT-3083 Fix a bug in wtperf config dump (#3224) Also add a test case to ensure the functionality doesn't break in the future. --- bench/wtperf/config.c | 87 +++++++++++-- bench/wtperf/wtperf.c | 122 ++++++++++++----- test/wtperf/test_conf_dump.py | 296 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 461 insertions(+), 44 deletions(-) create mode 100644 test/wtperf/test_conf_dump.py diff --git a/bench/wtperf/config.c b/bench/wtperf/config.c index 5b14a4cdf68..a15a3485dde 100644 --- a/bench/wtperf/config.c +++ b/bench/wtperf/config.c @@ -622,17 +622,9 @@ config_opt_str(WTPERF *wtperf, const char *optstr) return (ret); } - /* - * Append the current line to our copy of the config. The config is - * stored in the order it is processed, so added options will be after - * any parsed from the original config. We allocate len + 1 to allow for - * a null byte to be added. - */ - config_line = dcalloc(sizeof(CONFIG_QUEUE_ENTRY), 1); - config_line->string = dstrdup(optstr); - TAILQ_INSERT_TAIL(&opts->config_head, config_line, q); - while (ret == 0) { + size_t pos; + if ((ret = scan->next(scan, &k, &v)) != 0) { /* Any parse error has already been reported. */ if (ret == WT_NOTFOUND) @@ -640,6 +632,46 @@ config_opt_str(WTPERF *wtperf, const char *optstr) break; } ret = config_opt(wtperf, &k, &v); + + /* + * Append the key-value pair to our copy of the config. + * The config is stored in the order it is processed, so added + * options will be after any parsed from the original config. + */ + config_line = dcalloc(sizeof(CONFIG_QUEUE_ENTRY), 1); + /* + * If key or value is a string, consider extra space for the + * quotes. Add 2 to the required space for '=' and the ending + * null character in "key=value". + */ + config_line->string = dcalloc( + k.len + (k.type == WT_CONFIG_ITEM_STRING ? 2 : 0) + + v.len + (v.type == WT_CONFIG_ITEM_STRING ? 2 : 0) + 2, 1); + pos = 0; + if (k.type == WT_CONFIG_ITEM_STRING) { + config_line->string[pos] = '"'; + pos++; + } + strncpy(config_line->string + pos, k.str, k.len); + pos += k.len; + if (k.type == WT_CONFIG_ITEM_STRING) { + config_line->string[pos] = '"'; + pos++; + } + config_line->string[pos] = '='; + pos++; + if (v.type == WT_CONFIG_ITEM_STRING) { + config_line->string[pos] = '"'; + pos++; + } + strncpy(config_line->string + pos, v.str, v.len); + pos += v.len; + if (v.type == WT_CONFIG_ITEM_STRING) { + config_line->string[pos] = '"'; + pos++; + } + config_line->string[pos] = '\0'; + TAILQ_INSERT_TAIL(&opts->config_head, config_line, q); } if ((t_ret = scan->close(scan)) != 0) { lprintf(wtperf, ret, 0, "Error in config_scan_end"); @@ -754,8 +786,11 @@ config_consolidate(CONFIG_OPTS *opts) /* * This loop iterates over the config queue and for each entry checks if - * a later queue entry has the same key. If there's a match, the current - * queue entry is removed and we continue. + * a later queue entry has the same key. If there's a match, and key is + * "conn_config" or "table_config", the later queue entry is replaced + * with a concatenated entry of the two queue entries, the current queue + * entry is removed. For any other key, if there is a match, the current + * queue entry is removed. */ conf_line = TAILQ_FIRST(&opts->config_head); while (conf_line != NULL) { @@ -771,6 +806,34 @@ config_consolidate(CONFIG_OPTS *opts) if (strncmp(conf_line->string, test_line->string, (size_t)((string_key - conf_line->string) + 1)) == 0) { + if ((strncmp("conn_config=", conf_line->string, + (size_t)((string_key - conf_line->string) + + 1)) == 0) || + (strncmp("table_config=", conf_line->string, + (size_t)((string_key - conf_line->string) + + 1)) == 0)) { + char *concat_str, *val_pointer; + + /* + * To concatenate the two config + * strings, copy the first string to a + * new one, replace the ending '"' with + * a ',' and then concatenate the second + * string's value after its starting '"' + */ + val_pointer = + strchr(test_line->string, '=') + 2; + concat_str = + dmalloc(strlen(conf_line->string) + + strlen(val_pointer) + 1); + strcpy(concat_str, conf_line->string); + concat_str[strlen(concat_str) - 1] = + ','; + strcat(concat_str, val_pointer); + free(test_line->string); + test_line->string = concat_str; + } + TAILQ_REMOVE(&opts->config_head, conf_line, q); free(conf_line->string); free(conf_line); diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 8c7f0053388..2f747fa3fc7 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -2502,52 +2502,110 @@ main(int argc, char *argv[]) __wt_stream_set_line_buffer(stdout); /* Concatenate non-default configuration strings. */ - if (opts->verbose > 1 || user_cconfig != NULL || - opts->session_count_idle > 0 || wtperf->compress_ext != NULL || - wtperf->async_config != NULL) { - req_len = strlen(debug_cconfig) + 20; - if (user_cconfig != NULL) - req_len += strlen(user_cconfig); - if (wtperf->async_config != NULL) - req_len += strlen(wtperf->async_config); - if (wtperf->compress_ext != NULL) - req_len += strlen(wtperf->compress_ext); + if ((opts->verbose > 1 && strlen(debug_cconfig)) || + user_cconfig != NULL || opts->session_count_idle > 0 || + wtperf->compress_ext != NULL || wtperf->async_config != NULL) { + bool append_comma; + uint32_t pos; + + append_comma = false; + pos = 0; + req_len = 20; + req_len += (wtperf->async_config != NULL ? + strlen(wtperf->async_config) : 0); + req_len += (wtperf->compress_ext != NULL ? + strlen(wtperf->compress_ext) : 0); if (opts->session_count_idle > 0) { - sreq_len = strlen(",session_max=") + 6; + sreq_len = strlen("session_max=") + 6; req_len += sreq_len; sess_cfg = dmalloc(sreq_len); snprintf(sess_cfg, sreq_len, - ",session_max=%" PRIu32, + "session_max=%" PRIu32, opts->session_count_idle + wtperf->workers_cnt + opts->populate_threads + 10); } + req_len += (user_cconfig != NULL ? strlen(user_cconfig) : 0); + req_len += (debug_cconfig != NULL ? strlen(debug_cconfig) : 0); cc_buf = dmalloc(req_len); - snprintf(cc_buf, req_len, "%s,%s,%s,%s,%s", - wtperf->async_config ? wtperf->async_config : "", - wtperf->compress_ext ? wtperf->compress_ext : "", - opts->verbose > 1 ? debug_cconfig : "", - sess_cfg != NULL ? sess_cfg : "", - user_cconfig != NULL ? user_cconfig : ""); + + if (wtperf->async_config != NULL && + strlen(wtperf->async_config)) { + pos += (uint32_t)snprintf( + cc_buf + pos, req_len - pos, "%s%s", + append_comma ? "," : "", wtperf->async_config); + append_comma = true; + } + if (wtperf->compress_ext != NULL && + strlen(wtperf->compress_ext)) { + pos += (uint32_t)snprintf( + cc_buf + pos, req_len - pos, "%s%s", + append_comma ? "," : "", wtperf->compress_ext); + append_comma = true; + } + if (sess_cfg != NULL && strlen(sess_cfg)) { + pos += (uint32_t)snprintf( + cc_buf + pos, req_len - pos, "%s%s", + append_comma ? "," : "", sess_cfg); + append_comma = true; + } + if (user_cconfig != NULL && strlen(user_cconfig)) { + pos += (uint32_t)snprintf( + cc_buf + pos, req_len - pos, "%s%s", + append_comma ? "," : "", user_cconfig); + append_comma = true; + } + if (opts->verbose > 1 && strlen(debug_cconfig)) { + pos += (uint32_t)snprintf( + cc_buf + pos, req_len - pos, "%s%s", + append_comma ? "," : "", debug_cconfig); + append_comma = true; + } + if (strlen(cc_buf) && (ret = config_opt_name_value(wtperf, "conn_config", cc_buf)) != 0) goto err; } - if (opts->verbose > 1 || opts->index || + if ((opts->verbose > 1 && strlen(debug_tconfig)) || opts->index || user_tconfig != NULL || wtperf->compress_table != NULL) { - req_len = strlen(debug_tconfig) + 20; - if (user_tconfig != NULL) - req_len += strlen(user_tconfig); - if (wtperf->compress_table != NULL) - req_len += strlen(wtperf->compress_table); - if (opts->index) - req_len += strlen(INDEX_COL_NAMES); + bool append_comma; + uint32_t pos; + + append_comma = false; + pos = 0; + req_len = 20; + req_len += (wtperf->compress_table != NULL ? + strlen(wtperf->compress_table) : 0); + req_len += (opts->index ? strlen(INDEX_COL_NAMES) : 0); + req_len += (user_tconfig != NULL ? strlen(user_tconfig) : 0); + req_len += (debug_tconfig != NULL ? strlen(debug_tconfig) : 0); tc_buf = dmalloc(req_len); - snprintf(tc_buf, req_len, "%s,%s,%s,%s", - opts->index ? INDEX_COL_NAMES : "", - wtperf->compress_table != NULL ? - wtperf->compress_table : "", - opts->verbose > 1 ? debug_tconfig : "", - user_tconfig ? user_tconfig : ""); + + if (wtperf->compress_table != NULL && + strlen(wtperf->compress_table)) { + pos += (uint32_t)snprintf( + tc_buf + pos, req_len - pos, "%s%s", + append_comma ? "," : "", wtperf->compress_table); + append_comma = true; + } + if (opts->index) { + pos += (uint32_t)snprintf( + tc_buf + pos, req_len - pos, "%s%s", + append_comma ? "," : "", INDEX_COL_NAMES); + append_comma = true; + } + if (user_tconfig != NULL && strlen(user_tconfig)) { + pos += (uint32_t)snprintf( + tc_buf + pos, req_len - pos, "%s%s", + append_comma ? "," : "", user_tconfig); + append_comma = true; + } + if (opts->verbose > 1 && strlen(debug_tconfig)) { + pos += (uint32_t)snprintf( + tc_buf + pos, req_len - pos, "%s%s", + append_comma ? "," : "", debug_tconfig); + append_comma = true; + } + if (strlen(tc_buf) && (ret = config_opt_name_value(wtperf, "table_config", tc_buf)) != 0) goto err; diff --git a/test/wtperf/test_conf_dump.py b/test/wtperf/test_conf_dump.py new file mode 100644 index 00000000000..ef7f276a1d0 --- /dev/null +++ b/test/wtperf/test_conf_dump.py @@ -0,0 +1,296 @@ +# Usage: python test_conf_dump.py +# +# This script tests if the config file dumped in the test directory corresponds +# correctly to the wtperf config file used. Command line options to wtperf are +# also taken into account. +# +# Following expectations are checked for: +# 1. If provided through multiple sources, "conn_config" and "table_config" +# configuration options are appended to each other. All other options get +# replaced by a higher precedent source. +# 2. The precedence order for the options in an increasing order is as follows: +# default option, +# provided through config file, +# provided through option -o +# provided through option -C (for conn_config) or -T (for table_config) +# +# Test fails if any config option is missing or has a wrong value. Test also +# fails if the value for the option is not replaced/appended in the correct +# order of precedence as stated above. + +import os, re, subprocess, sys + +OP_FILE = "WT_TEST/CONFIG.wtperf" +TMP_CONF = "__tmp.wtperf" +WTPERF_BIN = "./wtperf" +WTPERF_DIR = "../../build_posix/bench/wtperf/" + +CONF_NOT_PROVIDED = -2 + +# Generate a wtperf conf file to use +def generate_conf_file(file_name): + f = open(file_name, 'w') + f.write( +'''conn_config="cache_size=16GB,eviction=(threads_max=4),log=(enabled=false),session_max=33" +table_config="leaf_page_max=32k,internal_page_max=16k,allocation_size=4k,split_pct=90,type=file" +close_conn=false +icount=1500 +create=true +compression="snappy" +checkpoint_interval=5 +checkpoint_threads=1 +populate_threads=1 +report_interval=5 +session_count_idle=50 +session_count_idle=60 +session_count_idle=70 +session_count_idle=80 +run_time=5 +sample_interval=5 +sample_rate=1 +table_count=2 +threads=((count=6,updates=1)) +value_sz=1000 +warmup=2 +''') + f.close() + +# Build a command from the given options and execute wtperf +def execute_wtperf(conf_file, option_C = "", option_T = "", option_o = ""): + # Generate the command to run, execute wtperf + cmd = WTPERF_BIN + " -O " + conf_file + if option_C: + cmd += " -C " + option_C + if option_T: + cmd += " -T " + option_T + if option_o: + # Any quotes in option_o need to be escaped before providing it as part + # of the command + option_o_cmd_str = option_o.replace('"', '\\"') + cmd += " -o " + option_o_cmd_str + + print "Running: ", cmd + subprocess.check_call(cmd, shell=True) + print "=========================\n" + +# Build a dictionary of config key and it's value from the given config file. +# Optionally take -C, -T and -o and overwrite/append values as per correct +# precedence +def build_dict_from_conf( + conf_file, option_C = "", option_T = "", option_o = ""): + # Open given conf file and make a dictionary of passed arguments and values + with open(conf_file) as f: + lines = f.read().splitlines() + + # Maintain precedence order of config file, -o, -C/-T + # Build a dict of config options, appending values for table_config and + # conn_config, if specified multiple times. Replace with the latest in + # case of all other configuration keys. + key_val_dict = {} + for line in lines: + if re.match('^\s*#', line) is None: + key_val_pair = line.split('=', 1) + if ((key_val_pair[0] == 'table_config' or + key_val_pair[0] == 'conn_config') and + key_val_pair[0] in key_val_dict): + tmp_val = key_val_dict[key_val_pair[0]][:-1] + tmp_val += "," + tmp_val += key_val_pair[1][1:] + key_val_dict[key_val_pair[0]] = tmp_val + else: + key_val_dict[key_val_pair[0]] = key_val_pair[1] + + # If provided, put option o in the dict + if option_o: + opt_o_key_val_list = option_o.split(',') + for op_o_key_val in opt_o_key_val_list: + key_val_pair = op_o_key_val.split('=', 1) + if ((key_val_pair[0] == 'table_config' or + key_val_pair[0] == 'conn_config') and + key_val_pair[0] in key_val_dict): + tmp_val = key_val_dict[key_val_pair[0]][:-1] + tmp_val += "," + tmp_val += key_val_pair[1][1:] + key_val_dict[key_val_pair[0]] = tmp_val + else: + key_val_dict[key_val_pair[0]] = key_val_pair[1] + + # If provided, put option C in the dict + if option_C: + tmp_val = key_val_dict["conn_config"][:-1] + tmp_val += "," + tmp_val += option_C[1:] + key_val_dict["conn_config"] = tmp_val + + # If provided, put option T in the dict + if option_T: + tmp_val = key_val_dict["table_config"][:-1] + tmp_val += "," + tmp_val += option_T[1:] + key_val_dict["table_config"] = tmp_val + + return key_val_dict + +# Extract configuration value for the given key from the given config file +def extract_config_from_file(conf_file, key): + ret_val = "" + with open(conf_file) as f: + lines = f.read().splitlines() + for line in lines: + if re.match('^\s*#', line) is None: + key_val_pair = line.split('=', 1) + if key_val_pair[0] == key: + ret_val = key_val_pair[1] + return ret_val + +# Extract configuration value for the given key from the given "-o" string +def extract_config_from_opt_o(option_o, key): + ret_val = "" + opt_o_key_val_list = option_o.split(',') + for op_o_key_val in opt_o_key_val_list: + key_val_pair = op_o_key_val.split('=', 1) + if key_val_pair[0] == key: + ret_val = key_val_pair[1] + return ret_val + +# Execute test: +# Run wtperf with given config and check if the dumped config file matches the +# given inputs +def run_test(conf_file, option_C = "", option_T = "", option_o = ""): + # Run wtperf + execute_wtperf(conf_file, option_C, option_T, option_o) + + key_val_dict_ip = build_dict_from_conf( + conf_file, option_C, option_T, option_o) + key_val_dict_op = build_dict_from_conf(OP_FILE) + + conn_config_from_file = extract_config_from_file(conf_file, "conn_config") + table_config_from_file = extract_config_from_file(conf_file, "table_config") + conn_config_from_opt_o = "" + table_config_from_opt_o = "" + if option_o: + conn_config_from_opt_o = extract_config_from_opt_o( + option_o, "conn_config") + table_config_from_opt_o = extract_config_from_opt_o( + option_o, "table_config") + + # Check if dumped output conf matches with input file and options + match = True + for key in key_val_dict_ip: + match_itr = True + + # Check if we see this config key in the dumped file + if not key in key_val_dict_op: + print "Key '", key, "' not found in dumped file ", OP_FILE + match = match_itr = False + continue + + # Check if values from all sources of conn_config are presented in the + # conn_config in dumped file. Also check of their relative ordering as + # per precedence rules defined. + if (key == 'conn_config' and + (conn_config_from_file or conn_config_from_opt_o or option_C)): + # Should find these config in order: file < option o < option C + file_loc = CONF_NOT_PROVIDED + option_o_loc = CONF_NOT_PROVIDED + option_C_loc = CONF_NOT_PROVIDED + op_conn_config = key_val_dict_op['conn_config'] + + if conn_config_from_file: + file_loc = op_conn_config.find(conn_config_from_file[1:-1]) + if conn_config_from_opt_o: + option_o_loc = op_conn_config.find(conn_config_from_opt_o[1:-1]) + if option_C: + option_C_loc = op_conn_config.find(option_C[1:-1]) + + # Check if value from any of the sources is missing + if ((conn_config_from_file and file_loc == -1) or + (conn_config_from_opt_o and option_o_loc == -1) or + (option_C and option_C_loc == -1)): + print "Part of conn_config missing in dumped file ", OP_FILE + match_itr = False + + # Check if the values got appended in the correct order + if match_itr: + if ((option_o_loc != CONF_NOT_PROVIDED and + option_o_loc < file_loc) or + (option_C_loc != CONF_NOT_PROVIDED and + (option_C_loc < file_loc or option_C_loc < option_o_loc))): + print "Detected incorrect config append order:" + match_itr = False + + # Check if values from all sources of table_config are presented in the + # table_config in dumped file. Also check of their relative ordering as + # per precedence rules defined. + if (key == 'table_config' and + (table_config_from_file or table_config_from_opt_o or option_T)): + # Should find these config in order: file < option o < option T + file_loc = CONF_NOT_PROVIDED + option_o_loc = CONF_NOT_PROVIDED + option_T_loc = CONF_NOT_PROVIDED + op_table_config = key_val_dict_op['table_config'] + + if table_config_from_file: + file_loc = op_table_config.find(table_config_from_file[1:-1]) + if table_config_from_opt_o: + option_o_loc = op_table_config.find( + table_config_from_opt_o[1:-1]) + if option_T: + option_T_loc = op_table_config.find(option_T[1:-1]) + + # Check if value from any of the sources is missing + if ((table_config_from_file and file_loc == -1) or + (table_config_from_opt_o and option_o_loc == -1) or + (option_T and option_T_loc == -1)): + print "Part of table_config missing in dumped file ", OP_FILE + match_itr = False + + # Check if the values got appended in the correct order + if match_itr: + if ((option_o_loc != CONF_NOT_PROVIDED and + option_o_loc < file_loc) or + (option_T_loc != CONF_NOT_PROVIDED and + (option_T_loc < file_loc or option_T_loc < option_o_loc))): + print "Detected incorrect config append order:" + match_itr = False + + if (key != 'table_config' and key != 'conn_config' and + key_val_dict_ip[key] != key_val_dict_op[key]): + print "Config mismatch between:" + match_itr = False + + if match_itr is False: + print "Input Config:", key, '=', key_val_dict_ip[key] + print "Dumped Config:", key, '=', key_val_dict_op[key] + print "\n" + + match = match and match_itr + + return match + +# ----------------- Execute Test -------------- +# If a wtperf conf file is provided use it, else generate a temp conf file +os.chdir(WTPERF_DIR) +if len(sys.argv) == 2: + conf_file = sys.argv[1] +else: + conf_file = TMP_CONF + generate_conf_file(conf_file) + +# Run a test with no options +if not run_test(conf_file): + exit(-1) + +# Run a test with -C, -T, -o provided +option_o = "verbose=2,conn_config=\"session_max=135\",table_config=\"type=lsm\",sample_interval=2,run_time=0,sample_rate=2,readonly=false" +option_C = "\"cache_size=10GB,session_max=115\"" +option_T = "\"allocation_size=8k,split_pct=92\"" +if not run_test(conf_file, option_C, option_T, option_o): + exit(-1) + +# Cleanup generated temp files +subprocess.check_call("rm -rf WT_TEST/", shell=True) +if len(sys.argv) == 1 and conf_file == TMP_CONF: + subprocess.check_call("rm " + TMP_CONF, shell=True) + +print "All tests succeeded" -- cgit v1.2.1 From 91dd1fa489cab34a40e3f0115fe6771326e9c410 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 18 Jan 2017 21:05:06 -0500 Subject: WT-3134 Coverity scan reports 1368529 and 1368528 (#3251) * Coverity complains: CID 1368529: Security best practices violations (TOCTOU) Calling function "fopen" that uses "fname" after a check function. This can cause a time-of-check, time-of-use race condition. We're doing: snprintf(buffer); stat(buffer); snprintf(buffer); fopen(buffer); and I think Coverity is ignoring the second snprintf(), and is complaining about a stat followed by an fopen some number of lines of code later. It's simple enough to give the two calls their own buffers, hopefully that will keep Coverity quiet. Use 1024 as the size of a path instead of 512, (that's the traditional MAXPATHLEN value). Use sizeof(home) in calls to testutil_work_dir_from_path() so we don't accidentally diverge from the declared size. Clean up an error call, there's no need for two error messages. * Coverity complains: CID 1368528: (DEADCODE) Execution cannot reach the expression "","" inside this statement: "pos += (uint32_t)snprintf(c...". Replace boolean variable with a "const char *" that's set to either an empty string or a comma, removing the need for the test. Use size_t as the size of an object in memory, not a uint32_t. Don't declare variables in block scope. Assignment operators are the lowest priority operator (well, except for comma), don't bother declaring the order of evalution for an assignment operator. strlen() returns a size_t length, don't evaluate it as a boolean. --- bench/wtperf/wtperf.c | 118 ++++++++++++++++++++---------------------- test/recovery/random-abort.c | 17 +++--- test/recovery/truncated-log.c | 4 +- 3 files changed, 65 insertions(+), 74 deletions(-) diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 2f747fa3fc7..91cedee8328 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -2361,11 +2361,11 @@ main(int argc, char *argv[]) { CONFIG_OPTS *opts; WTPERF *wtperf, _wtperf; - size_t req_len, sreq_len; + size_t pos, req_len, sreq_len; bool monitor_set; int ch, ret; const char *cmdflags = "C:h:m:O:o:T:"; - const char *config_opts; + const char *append_comma, *config_opts; char *cc_buf, *path, *sess_cfg, *tc_buf, *user_cconfig, *user_tconfig; /* The first WTPERF structure (from which all others are derived). */ @@ -2502,19 +2502,14 @@ main(int argc, char *argv[]) __wt_stream_set_line_buffer(stdout); /* Concatenate non-default configuration strings. */ - if ((opts->verbose > 1 && strlen(debug_cconfig)) || + if ((opts->verbose > 1 && strlen(debug_cconfig) != 0) || user_cconfig != NULL || opts->session_count_idle > 0 || wtperf->compress_ext != NULL || wtperf->async_config != NULL) { - bool append_comma; - uint32_t pos; - - append_comma = false; - pos = 0; req_len = 20; - req_len += (wtperf->async_config != NULL ? - strlen(wtperf->async_config) : 0); - req_len += (wtperf->compress_ext != NULL ? - strlen(wtperf->compress_ext) : 0); + req_len += wtperf->async_config != NULL ? + strlen(wtperf->async_config) : 0; + req_len += wtperf->compress_ext != NULL ? + strlen(wtperf->compress_ext) : 0; if (opts->session_count_idle > 0) { sreq_len = strlen("session_max=") + 6; req_len += sreq_len; @@ -2524,89 +2519,88 @@ main(int argc, char *argv[]) opts->session_count_idle + wtperf->workers_cnt + opts->populate_threads + 10); } - req_len += (user_cconfig != NULL ? strlen(user_cconfig) : 0); - req_len += (debug_cconfig != NULL ? strlen(debug_cconfig) : 0); + req_len += user_cconfig != NULL ? strlen(user_cconfig) : 0; + req_len += debug_cconfig != NULL ? strlen(debug_cconfig) : 0; cc_buf = dmalloc(req_len); + pos = 0; + append_comma = ""; if (wtperf->async_config != NULL && - strlen(wtperf->async_config)) { - pos += (uint32_t)snprintf( + strlen(wtperf->async_config) != 0) { + pos += (size_t)snprintf( cc_buf + pos, req_len - pos, "%s%s", - append_comma ? "," : "", wtperf->async_config); - append_comma = true; + append_comma, wtperf->async_config); + append_comma = ","; } if (wtperf->compress_ext != NULL && - strlen(wtperf->compress_ext)) { - pos += (uint32_t)snprintf( + strlen(wtperf->compress_ext) != 0) { + pos += (size_t)snprintf( cc_buf + pos, req_len - pos, "%s%s", - append_comma ? "," : "", wtperf->compress_ext); - append_comma = true; + append_comma, wtperf->compress_ext); + append_comma = ","; } - if (sess_cfg != NULL && strlen(sess_cfg)) { - pos += (uint32_t)snprintf( + if (sess_cfg != NULL && strlen(sess_cfg) != 0) { + pos += (size_t)snprintf( cc_buf + pos, req_len - pos, "%s%s", - append_comma ? "," : "", sess_cfg); - append_comma = true; + append_comma, sess_cfg); + append_comma = ","; } - if (user_cconfig != NULL && strlen(user_cconfig)) { - pos += (uint32_t)snprintf( + if (user_cconfig != NULL && strlen(user_cconfig) != 0) { + pos += (size_t)snprintf( cc_buf + pos, req_len - pos, "%s%s", - append_comma ? "," : "", user_cconfig); - append_comma = true; + append_comma, user_cconfig); + append_comma = ","; } - if (opts->verbose > 1 && strlen(debug_cconfig)) { - pos += (uint32_t)snprintf( + if (opts->verbose > 1 && strlen(debug_cconfig) != 0) { + pos += (size_t)snprintf( cc_buf + pos, req_len - pos, "%s%s", - append_comma ? "," : "", debug_cconfig); - append_comma = true; + append_comma, debug_cconfig); + append_comma = ","; } - if (strlen(cc_buf) && (ret = + if (strlen(cc_buf) != 0 && (ret = config_opt_name_value(wtperf, "conn_config", cc_buf)) != 0) goto err; } - if ((opts->verbose > 1 && strlen(debug_tconfig)) || opts->index || + if ((opts->verbose > 1 && strlen(debug_tconfig) != 0) || opts->index || user_tconfig != NULL || wtperf->compress_table != NULL) { - bool append_comma; - uint32_t pos; - - append_comma = false; - pos = 0; req_len = 20; - req_len += (wtperf->compress_table != NULL ? - strlen(wtperf->compress_table) : 0); - req_len += (opts->index ? strlen(INDEX_COL_NAMES) : 0); - req_len += (user_tconfig != NULL ? strlen(user_tconfig) : 0); - req_len += (debug_tconfig != NULL ? strlen(debug_tconfig) : 0); + req_len += wtperf->compress_table != NULL ? + strlen(wtperf->compress_table) : 0; + req_len += opts->index ? strlen(INDEX_COL_NAMES) : 0; + req_len += user_tconfig != NULL ? strlen(user_tconfig) : 0; + req_len += debug_tconfig != NULL ? strlen(debug_tconfig) : 0; tc_buf = dmalloc(req_len); + pos = 0; + append_comma = ""; if (wtperf->compress_table != NULL && - strlen(wtperf->compress_table)) { - pos += (uint32_t)snprintf( + strlen(wtperf->compress_table) != 0) { + pos += (size_t)snprintf( tc_buf + pos, req_len - pos, "%s%s", - append_comma ? "," : "", wtperf->compress_table); - append_comma = true; + append_comma, wtperf->compress_table); + append_comma = ","; } if (opts->index) { - pos += (uint32_t)snprintf( + pos += (size_t)snprintf( tc_buf + pos, req_len - pos, "%s%s", - append_comma ? "," : "", INDEX_COL_NAMES); - append_comma = true; + append_comma, INDEX_COL_NAMES); + append_comma = ","; } - if (user_tconfig != NULL && strlen(user_tconfig)) { - pos += (uint32_t)snprintf( + if (user_tconfig != NULL && strlen(user_tconfig) != 0) { + pos += (size_t)snprintf( tc_buf + pos, req_len - pos, "%s%s", - append_comma ? "," : "", user_tconfig); - append_comma = true; + append_comma, user_tconfig); + append_comma = ","; } - if (opts->verbose > 1 && strlen(debug_tconfig)) { - pos += (uint32_t)snprintf( + if (opts->verbose > 1 && strlen(debug_tconfig) != 0) { + pos += (size_t)snprintf( tc_buf + pos, req_len - pos, "%s%s", - append_comma ? "," : "", debug_tconfig); - append_comma = true; + append_comma, debug_tconfig); + append_comma = ","; } - if (strlen(tc_buf) && (ret = + if (strlen(tc_buf) != 0 && (ret = config_opt_name_value(wtperf, "table_config", tc_buf)) != 0) goto err; } diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c index a6e4d9801e5..660ef0cca67 100644 --- a/test/recovery/random-abort.c +++ b/test/recovery/random-abort.c @@ -31,7 +31,7 @@ #include #include -static char home[512]; /* Program working dir */ +static char home[1024]; /* Program working dir */ static const char *progname; /* Program name */ /* * These two names for the URI and file system must be maintained in tandem. @@ -227,7 +227,7 @@ main(int argc, char *argv[]) pid_t pid; bool fatal, rand_th, rand_time, verify_only; const char *working_dir; - char fname[64], kname[64]; + char fname[64], kname[64], statname[1024]; if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) progname = argv[0]; @@ -268,7 +268,7 @@ main(int argc, char *argv[]) if (argc != 0) usage(); - testutil_work_dir_from_path(home, 512, working_dir); + testutil_work_dir_from_path(home, sizeof(home), working_dir); /* * If the user wants to verify they need to tell us how many threads * there were so we can find the old record files. @@ -316,8 +316,8 @@ main(int argc, char *argv[]) * still exists in case the child aborts for some reason we * don't stay in this loop forever. */ - snprintf(fname, sizeof(fname), "%s/%s", home, fs_main); - while (stat(fname, &sb) != 0 && kill(pid, 0) == 0) + snprintf(statname, sizeof(statname), "%s/%s", home, fs_main); + while (stat(statname, &sb) != 0 && kill(pid, 0) == 0) sleep(1); sleep(timeout); @@ -352,11 +352,8 @@ main(int argc, char *argv[]) for (i = 0; i < nth; ++i) { middle = 0; snprintf(fname, sizeof(fname), RECORDS_FILE, i); - if ((fp = fopen(fname, "r")) == NULL) { - fprintf(stderr, - "Failed to open %s. i %" PRIu32 "\n", fname, i); - testutil_die(errno, "fopen"); - } + if ((fp = fopen(fname, "r")) == NULL) + testutil_die(errno, "fopen: %s", fname); /* * For every key in the saved file, verify that the key exists diff --git a/test/recovery/truncated-log.c b/test/recovery/truncated-log.c index c265263d44c..6a142b8e710 100644 --- a/test/recovery/truncated-log.c +++ b/test/recovery/truncated-log.c @@ -35,7 +35,7 @@ #define snprintf _snprintf #endif -static char home[512]; /* Program working dir */ +static char home[1024]; /* Program working dir */ static const char *progname; /* Program name */ static const char * const uri = "table:main"; @@ -290,7 +290,7 @@ main(int argc, char *argv[]) if (argc != 0) usage(); - testutil_work_dir_from_path(home, 512, working_dir); + testutil_work_dir_from_path(home, sizeof(home), working_dir); testutil_make_work_dir(home); /* -- cgit v1.2.1 From 45777eb7682e4bbed46be555ad667101775a160c Mon Sep 17 00:00:00 2001 From: sueloverso Date: Thu, 19 Jan 2017 18:59:53 -0500 Subject: WT-3105 Fix the thread group usage on eviction reconfigure and add test. (#3252) --- src/conn/conn_cache.c | 3 +-- src/include/connection.h | 1 - test/suite/test_reconfig01.py | 12 ++++++++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c index 9b07b46abcd..2b0e5081f04 100644 --- a/src/conn/conn_cache.c +++ b/src/conn/conn_cache.c @@ -144,8 +144,7 @@ __wt_cache_config(WT_SESSION_IMPL *session, bool reconfigure, const char *cfg[]) WT_RET(__wt_thread_group_resize( session, &conn->evict_threads, conn->evict_threads_min, - WT_MAX(conn->evict_threads_min, - WT_MIN(conn->evict_threads_max, EVICT_GROUP_INCR)), + conn->evict_threads_max, WT_THREAD_CAN_WAIT | WT_THREAD_PANIC_FAIL)); return (0); diff --git a/src/include/connection.h b/src/include/connection.h index 7d2b78e9f66..64ac4271db1 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -301,7 +301,6 @@ struct __wt_connection_impl { uint32_t evict_threads_max;/* Max eviction threads */ uint32_t evict_threads_min;/* Min eviction threads */ -#define EVICT_GROUP_INCR 4 /* Evict group size increased in batches */ uint32_t evict_tune_datapts_needed;/* Data needed to tune */ struct timespec evict_tune_last_action_time;/* Time of last action */ struct timespec evict_tune_last_time; /* Time of last check */ diff --git a/test/suite/test_reconfig01.py b/test/suite/test_reconfig01.py index e76becac76a..cbc8bca5740 100644 --- a/test/suite/test_reconfig01.py +++ b/test/suite/test_reconfig01.py @@ -64,6 +64,18 @@ class test_reconfig01(wttest.WiredTigerTestCase): # same ops_max of 512 and thread of 8. self.conn.reconfigure("async=(enabled=true)") + def test_reconfig_eviction(self): + # Increase the max number of running threads (default 8). + self.conn.reconfigure("eviction=(threads_max=10)") + # Increase the min number of running threads (default 1). + self.conn.reconfigure("eviction=(threads_min=5)") + # Decrease the max number of running threads. + self.conn.reconfigure("eviction=(threads_max=7)") + # Decrease the min number of running threads. + self.conn.reconfigure("eviction=(threads_min=2)") + # Set min and max the same. + self.conn.reconfigure("eviction=(threads_min=6,threads_max=6)") + def test_reconfig_lsm_manager(self): # We create and populate a tiny LSM so that we can start off with # the LSM threads running and change the numbers of threads. -- cgit v1.2.1 From 3ac01b8a147ed5652199c0e577f3300bf4d78a57 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 20 Jan 2017 15:48:10 -0500 Subject: Add a verbose message if we don't find any log files, (#3245) user pointed us at the wrong directory. --- src/log/log.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/log/log.c b/src/log/log.c index 74c5442d405..da500a74e87 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -1655,10 +1655,7 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, WT_RET(__log_get_files(session, WT_LOG_FILENAME, &logfiles, &logcount)); if (logcount == 0) - /* - * Return it is not supported if none don't exist. - */ - return (ENOTSUP); + WT_RET_MSG(session, ENOTSUP, "no log files found"); for (i = 0; i < logcount; i++) { WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum)); -- cgit v1.2.1 From 573bc1a8027e21176c1f3e27483b0abc719131a0 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sun, 22 Jan 2017 17:38:06 -0500 Subject: Set the database home and configure error handling before (#3256) loading extensions, custom filesystems (for example) needs to know the database home. --- src/conn/conn_api.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 50617240d38..f691a76b1f2 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -2175,6 +2175,15 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, if (cval.val) F_SET(conn, WT_CONN_READONLY); + /* Configure error messages so we get them right early. */ + WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval)); + if (cval.len != 0) + WT_ERR(__wt_strndup( + session, cval.str, cval.len, &conn->error_prefix)); + + /* Set the database home so extensions have access to it. */ + WT_ERR(__conn_home(session, home, cfg)); + /* * Load early extensions before doing further initialization (one early * extension is to configure a file system). @@ -2198,6 +2207,9 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR( __conn_chk_file_system(session, F_ISSET(conn, WT_CONN_READONLY))); + /* Make sure no other thread of control already owns this database. */ + WT_ERR(__conn_single(session, cfg)); + /* * Capture the config_base setting file for later use. Again, if the * application doesn't want us to read the base configuration file, @@ -2207,18 +2219,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR(__wt_config_gets(session, cfg, "config_base", &cval)); config_base_set = cval.val != 0; - /* Configure error messages so we get them right early. */ - WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval)); - if (cval.len != 0) - WT_ERR(__wt_strndup( - session, cval.str, cval.len, &conn->error_prefix)); - - /* Get the database home. */ - WT_ERR(__conn_home(session, home, cfg)); - - /* Make sure no other thread of control already owns this database. */ - WT_ERR(__conn_single(session, cfg)); - /* * Build the real configuration stack, in the following order (where * later entries override earlier entries): -- cgit v1.2.1 From bf8de9767982da9ae0f1542f3744c8aa8544fb82 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sun, 22 Jan 2017 17:56:29 -0500 Subject: Coverity 1369053: assigning values that are never subsequently used. (#3257) --- bench/wtperf/wtperf.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 91cedee8328..baa259f8817 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -2551,12 +2551,10 @@ main(int argc, char *argv[]) append_comma, user_cconfig); append_comma = ","; } - if (opts->verbose > 1 && strlen(debug_cconfig) != 0) { + if (opts->verbose > 1 && strlen(debug_cconfig) != 0) pos += (size_t)snprintf( cc_buf + pos, req_len - pos, "%s%s", append_comma, debug_cconfig); - append_comma = ","; - } if (strlen(cc_buf) != 0 && (ret = config_opt_name_value(wtperf, "conn_config", cc_buf)) != 0) @@ -2593,12 +2591,10 @@ main(int argc, char *argv[]) append_comma, user_tconfig); append_comma = ","; } - if (opts->verbose > 1 && strlen(debug_tconfig) != 0) { + if (opts->verbose > 1 && strlen(debug_tconfig) != 0) pos += (size_t)snprintf( tc_buf + pos, req_len - pos, "%s%s", append_comma, debug_tconfig); - append_comma = ","; - } if (strlen(tc_buf) != 0 && (ret = config_opt_name_value(wtperf, "table_config", tc_buf)) != 0) -- cgit v1.2.1 From 2764dd76aebbf6b71b61bf574b01a8028526731d Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Mon, 23 Jan 2017 00:19:30 -0500 Subject: WT-3120 Fix ordering problem in connection_close for custom filesystem loaded via shared lib (#3239) Also add fail_fs extension, as well as a simple test for it. --- build_posix/Make.subdirs | 1 + dist/s_void | 5 + ext/test/fail_fs/Makefile.am | 9 + ext/test/fail_fs/fail_fs.c | 703 ++++++++++++++++++++++++++++++++++++++ src/conn/conn_handle.c | 11 +- src/conn/conn_open.c | 25 +- src/include/extern.h | 2 +- test/csuite/Makefile.am | 3 + test/csuite/wt3120_filesys/main.c | 98 ++++++ 9 files changed, 837 insertions(+), 20 deletions(-) create mode 100644 ext/test/fail_fs/Makefile.am create mode 100644 ext/test/fail_fs/fail_fs.c create mode 100644 test/csuite/wt3120_filesys/main.c diff --git a/build_posix/Make.subdirs b/build_posix/Make.subdirs index 01f23dcbbc1..4ecec37ca6c 100644 --- a/build_posix/Make.subdirs +++ b/build_posix/Make.subdirs @@ -17,6 +17,7 @@ ext/encryptors/nop ext/encryptors/rotn ext/extractors/csv ext/test/kvs_bdb HAVE_BERKELEY_DB +ext/test/fail_fs . api/leveldb LEVELDB examples/c diff --git a/dist/s_void b/dist/s_void index 025f6d4c7eb..4a6b4ad91a2 100755 --- a/dist/s_void +++ b/dist/s_void @@ -78,6 +78,11 @@ func_ok() -e '/int demo_file_sync$/d' \ -e '/int demo_fs_directory_list_free$/d' \ -e '/int demo_fs_exist$/d' \ + -e '/int fail_file_lock$/d' \ + -e '/int fail_file_sync$/d' \ + -e '/int fail_fs_directory_list_free$/d' \ + -e '/int fail_fs_exist$/d' \ + -e '/int fail_fs_terminate$/d' \ -e '/int handle_message$/d' \ -e '/int handle_progress$/d' \ -e '/int helium_cursor_reset$/d' \ diff --git a/ext/test/fail_fs/Makefile.am b/ext/test/fail_fs/Makefile.am new file mode 100644 index 00000000000..f31f5395cd1 --- /dev/null +++ b/ext/test/fail_fs/Makefile.am @@ -0,0 +1,9 @@ +AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include + +noinst_LTLIBRARIES = libwiredtiger_fail_fs.la +libwiredtiger_fail_fs_la_SOURCES = fail_fs.c + +# libtool hack: noinst_LTLIBRARIES turns off building shared libraries as well +# as installation, it will only build static libraries. As far as I can tell, +# the "approved" libtool way to turn them back on is by adding -rpath. +libwiredtiger_fail_fs_la_LDFLAGS = -avoid-version -module -rpath /nowhere diff --git a/ext/test/fail_fs/fail_fs.c b/ext/test/fail_fs/fail_fs.c new file mode 100644 index 00000000000..e2538023a2c --- /dev/null +++ b/ext/test/fail_fs/fail_fs.c @@ -0,0 +1,703 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "queue.h" + +#define FAIL_FS_GIGABYTE (1024 * 1024 * 1024) + +/* + * A "fail file system", that is, a file system extension that fails when we + * want it to. This is only used in test frameworks, this fact allows us + * to simplify some error paths. + */ +typedef struct { + WT_FILE_SYSTEM iface; + /* + * WiredTiger performs schema and I/O operations in parallel, all file + * system and file handle access must be thread-safe. This extension + * uses a single, global file system lock. + */ + pthread_rwlock_t lock; /* Lock */ + int64_t read_ops; + int64_t write_ops; + int64_t allow_reads; + int64_t allow_writes; + /* Queue of file handles */ + TAILQ_HEAD(fail_file_handle_qh, fail_file_handle) fileq; + WT_EXTENSION_API *wtext; /* Extension functions */ +} FAIL_FILE_SYSTEM; + +typedef struct fail_file_handle { + WT_FILE_HANDLE iface; + + /* + * Track the system file descriptor for each file. + */ + FAIL_FILE_SYSTEM *fail_fs; /* Enclosing file system */ + TAILQ_ENTRY(fail_file_handle) q; /* Queue of handles */ + int fd; /* System file descriptor */ +} FAIL_FILE_HANDLE; + +static int fail_file_close(WT_FILE_HANDLE *, WT_SESSION *); +static void fail_file_handle_remove(WT_SESSION *, FAIL_FILE_HANDLE *); +static int fail_file_lock(WT_FILE_HANDLE *, WT_SESSION *, bool); +static int fail_file_read( + WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, void *); +static int fail_file_size( + WT_FILE_HANDLE *, WT_SESSION *, wt_off_t *); +static int fail_file_sync(WT_FILE_HANDLE *, WT_SESSION *); +static int fail_file_truncate(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t); +static int fail_file_write( + WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, const void *); +static bool fail_fs_arg( + const char *match, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value, + int64_t *argp); +static int fail_fs_directory_list(WT_FILE_SYSTEM *, WT_SESSION *, + const char *, const char *, char ***, uint32_t *); +static int fail_fs_directory_list_free( + WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t); +static int fail_fs_exist(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *); +static int fail_fs_open(WT_FILE_SYSTEM *, WT_SESSION *, + const char *, WT_FS_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **); +static int fail_fs_remove( + WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t); +static int fail_fs_rename( + WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t); +static int fail_fs_size( + WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *); +static int fail_fs_terminate(WT_FILE_SYSTEM *, WT_SESSION *); + +/* + * We use pthread functions for portable locking. + * Assert on errors for simplicity. + */ +static void +fail_fs_allocate_lock(pthread_rwlock_t *lockp) +{ + assert(pthread_rwlock_init(lockp, NULL) == 0); +} + +static void +fail_fs_destroy_lock(pthread_rwlock_t *lockp) +{ + assert(pthread_rwlock_destroy(lockp) == 0); +} + +static void +fail_fs_lock(pthread_rwlock_t *lockp) +{ + assert(pthread_rwlock_wrlock(lockp) == 0); +} + +static void +fail_fs_unlock(pthread_rwlock_t *lockp) +{ + assert(pthread_rwlock_unlock(lockp) == 0); +} + +/* + * fail_file_close -- + * ANSI C close. + */ +static int +fail_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *session) +{ + FAIL_FILE_HANDLE *fail_fh; + int ret; + + (void)session; /* Unused */ + + fail_fh = (FAIL_FILE_HANDLE *)file_handle; + + if (fail_fh->fd < 0) + return (EINVAL); + ret = close(fail_fh->fd); + fail_fh->fd = -1; + fail_file_handle_remove(session, fail_fh); + return (ret); +} + +/* + * fail_file_handle_remove -- + * Destroy an in-memory file handle. Should only happen on remove or + * shutdown. + */ +static void +fail_file_handle_remove(WT_SESSION *session, FAIL_FILE_HANDLE *fail_fh) +{ + FAIL_FILE_SYSTEM *fail_fs; + + (void)session; /* Unused */ + fail_fs = fail_fh->fail_fs; + + TAILQ_REMOVE(&fail_fs->fileq, fail_fh, q); + + free(fail_fh->iface.name); + free(fail_fh); +} + +/* + * fail_file_lock -- + * Lock/unlock a file. + */ +static int +fail_file_lock(WT_FILE_HANDLE *file_handle, WT_SESSION *session, bool lock) +{ + /* Locks are always granted. */ + (void)file_handle; /* Unused */ + (void)session; /* Unused */ + (void)lock; /* Unused */ + + return (0); +} + +/* + * fail_file_read -- + * POSIX pread. + */ +static int +fail_file_read(WT_FILE_HANDLE *file_handle, + WT_SESSION *session, wt_off_t offset, size_t len, void *buf) +{ + FAIL_FILE_HANDLE *fail_fh; + FAIL_FILE_SYSTEM *fail_fs; + WT_EXTENSION_API *wtext; + int64_t read_ops; + int ret; + size_t chunk; + ssize_t nr; + uint8_t *addr; + + fail_fh = (FAIL_FILE_HANDLE *)file_handle; + fail_fs = fail_fh->fail_fs; + wtext = fail_fs->wtext; + ret = 0; + + fail_fs_lock(&fail_fs->lock); + read_ops = ++fail_fs->read_ops; + fail_fs_unlock(&fail_fs->lock); + + if (fail_fs->allow_reads != 0 && read_ops % fail_fs->allow_reads == 0) { + (void)wtext->msg_printf(wtext, session, + "fail_fs: %s: simulated failure after %" PRId64 + " reads\n", fail_fh->iface.name, read_ops); + return (EIO); + } + + for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { + chunk = (len < FAIL_FS_GIGABYTE) ? len : FAIL_FS_GIGABYTE; + if ((nr = pread(fail_fh->fd, addr, chunk, offset)) <= 0) { + (void)wtext->err_printf(wtext, session, + "%s: handle-read: failed to read %" PRIu64 + " bytes at offset %" PRIu64 ": %s", + fail_fh->iface.name, (uint64_t)len, + (uint64_t)offset, wtext->strerror(wtext, NULL, nr)); + ret = (nr == 0 ? WT_ERROR : errno); + break; + } + } + return (ret); +} + +/* + * fail_file_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +fail_file_size( + WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t *sizep) +{ + FAIL_FILE_HANDLE *fail_fh; + struct stat statbuf; + int ret; + + (void)session; /* Unused */ + + fail_fh = (FAIL_FILE_HANDLE *)file_handle; + ret = 0; + + if ((ret = fstat(fail_fh->fd, &statbuf)) != 0) + return (ret); + *sizep = statbuf.st_size; + return (0); +} + +/* + * fail_file_sync -- + * Ensure the content of the file is stable. This is a no-op in our + * memory backed file system. + */ +static int +fail_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *session) +{ + (void)file_handle; /* Unused */ + (void)session; /* Unused */ + + return (0); +} + +/* + * fail_file_truncate -- + * POSIX ftruncate. + */ +static int +fail_file_truncate( + WT_FILE_HANDLE *file_handle, WT_SESSION *session, wt_off_t offset) +{ + FAIL_FILE_HANDLE *fail_fh; + + (void)session; /* Unused */ + + fail_fh = (FAIL_FILE_HANDLE *)file_handle; + return (ftruncate(fail_fh->fd, offset)); +} + +/* + * fail_file_write -- + * POSIX pwrite. + */ +static int +fail_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *session, + wt_off_t offset, size_t len, const void *buf) +{ + FAIL_FILE_HANDLE *fail_fh; + FAIL_FILE_SYSTEM *fail_fs; + WT_EXTENSION_API *wtext; + int64_t write_ops; + int ret; + size_t chunk; + ssize_t nr; + const uint8_t *addr; + + fail_fh = (FAIL_FILE_HANDLE *)file_handle; + fail_fs = fail_fh->fail_fs; + wtext = fail_fs->wtext; + ret = 0; + + fail_fs_lock(&fail_fs->lock); + write_ops = ++fail_fs->write_ops; + fail_fs_unlock(&fail_fs->lock); + + if (fail_fs->allow_writes != 0 && + write_ops % fail_fs->allow_writes == 0) { + (void)wtext->msg_printf(wtext, session, + "fail_fs: %s: simulated failure after %" PRId64 + " writes\n", fail_fh->iface.name, write_ops); + return (EIO); + } + + /* Break writes larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { + chunk = (len < FAIL_FS_GIGABYTE) ? len : FAIL_FS_GIGABYTE; + if ((nr = pwrite(fail_fh->fd, addr, chunk, offset)) <= 0) { + (void)wtext->err_printf(wtext, session, + "%s: handle-write: failed to write %" PRIu64 + " bytes at offset %" PRIu64 ": %s", + fail_fh->iface.name, (uint64_t)len, + (uint64_t)offset, wtext->strerror(wtext, NULL, nr)); + ret = (nr == 0 ? WT_ERROR : errno); + break; + } + } + return (ret); +} + +/* + * fail_fs_arg -- + * If the key matches, return the value interpreted as an integer. + */ +static bool +fail_fs_arg(const char *match, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value, + int64_t *argp) +{ + char *s; + int64_t result; + + if (strncmp(match, key->str, key->len) == 0 && + match[key->len] == '\0') { + s = (char *)value->str; + result = strtoll(s, &s, 10); + if ((size_t)(s - (char *)value->str) == value->len) { + *argp = result; + return (true); + } + } + return (false); +} + +/* + * fail_fs_directory_list -- + * Return a list of files in a given sub-directory. + */ +static int +fail_fs_directory_list(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *directory, + const char *prefix, char ***dirlistp, uint32_t *countp) +{ + FAIL_FILE_HANDLE *fail_fh; + FAIL_FILE_SYSTEM *fail_fs; + size_t len, prefix_len; + uint32_t allocated, count; + int ret; + char *name, **entries; + + (void)session; /* Unused */ + + fail_fs = (FAIL_FILE_SYSTEM *)file_system; + ret = 0; + *dirlistp = NULL; + *countp = 0; + + entries = NULL; + allocated = count = 0; + len = strlen(directory); + prefix_len = prefix == NULL ? 0 : strlen(prefix); + + fail_fs_lock(&fail_fs->lock); + TAILQ_FOREACH(fail_fh, &fail_fs->fileq, q) { + name = fail_fh->iface.name; + if (strncmp(name, directory, len) != 0 || + (prefix != NULL && strncmp(name, prefix, prefix_len) != 0)) + continue; + + /* + * Increase the list size in groups of 10, it doesn't + * matter if the list is a bit longer than necessary. + */ + if (count >= allocated) { + entries = realloc( + entries, (allocated + 10) * sizeof(char *)); + if (entries == NULL) { + ret = ENOMEM; + goto err; + } + memset(entries + allocated * sizeof(char *), + 0, 10 * sizeof(char *)); + allocated += 10; + } + entries[count++] = strdup(name); + } + + *dirlistp = entries; + *countp = count; + +err: fail_fs_unlock(&fail_fs->lock); + if (ret == 0) + return (0); + + if (entries != NULL) { + while (count > 0) + free(entries[--count]); + free(entries); + } + + return (ret); +} + +/* + * fail_fs_directory_list_free -- + * Free memory allocated by fail_fs_directory_list. + */ +static int +fail_fs_directory_list_free(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, char **dirlist, uint32_t count) +{ + (void)file_system; /* Unused */ + (void)session; /* Unused */ + + if (dirlist != NULL) { + while (count > 0) + free(dirlist[--count]); + free(dirlist); + } + return (0); +} + +/* + * fail_fs_exist -- + * Return if the file exists. + */ +static int +fail_fs_exist(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *name, bool *existp) +{ + (void)file_system; /* Unused */ + (void)session; /* Unused */ + + *existp = (access(name, 0) == 0); + return (0); +} + +/* + * fail_fs_open -- + * fopen for the fail file system. + */ +static int +fail_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, + const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags, + WT_FILE_HANDLE **file_handlep) +{ + FAIL_FILE_HANDLE *fail_fh; + FAIL_FILE_SYSTEM *fail_fs; + WT_FILE_HANDLE *file_handle; + int open_flags; + int ret; + + (void)file_type; /* Unused */ + (void)session; /* Unused */ + + *file_handlep = NULL; + ret = 0; + fail_fs = (FAIL_FILE_SYSTEM *)file_system; + fail_fh = NULL; + + fail_fs_lock(&fail_fs->lock); + + open_flags = 0; + if ((flags & WT_FS_OPEN_CREATE) != 0) + open_flags |= O_CREAT; + if ((flags & WT_FS_OPEN_EXCLUSIVE) != 0) + open_flags |= O_EXCL; + if ((flags & WT_FS_OPEN_READONLY) != 0) + open_flags |= O_RDONLY; + else + open_flags |= O_RDWR; + + if ((ret = open(name, open_flags, 0666)) < 0) + goto err; + + /* We create a handle structure for each open. */ + if ((fail_fh = calloc(1, sizeof(FAIL_FILE_HANDLE))) == NULL) { + ret = ENOMEM; + goto err; + } + + /* Initialize private information. */ + fail_fh->fail_fs = fail_fs; + fail_fh->fd = ret; + ret = 0; + + /* Initialize public information. */ + file_handle = (WT_FILE_HANDLE *)fail_fh; + if ((file_handle->name = strdup(name)) == NULL) { + ret = ENOMEM; + goto err; + } + + /* Setup the function call table. */ + file_handle->close = fail_file_close; + file_handle->fh_advise = NULL; + file_handle->fh_extend = NULL; + file_handle->fh_extend_nolock = NULL; + file_handle->fh_lock = fail_file_lock; + file_handle->fh_map = NULL; + file_handle->fh_map_discard = NULL; + file_handle->fh_map_preload = NULL; + file_handle->fh_unmap = NULL; + file_handle->fh_read = fail_file_read; + file_handle->fh_size = fail_file_size; + file_handle->fh_sync = fail_file_sync; + file_handle->fh_sync_nowait = NULL; + file_handle->fh_truncate = fail_file_truncate; + file_handle->fh_write = fail_file_write; + + TAILQ_INSERT_HEAD(&fail_fs->fileq, fail_fh, q); + + *file_handlep = file_handle; + + if (0) { +err: free(fail_fh); + } + + fail_fs_unlock(&fail_fs->lock); + return (ret); +} + +/* + * fail_fs_remove -- + * POSIX remove. + */ +static int +fail_fs_remove(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *name, uint32_t flags) +{ + (void)file_system; /* Unused */ + (void)session; /* Unused */ + (void)flags; /* Unused */ + + return (unlink(name)); +} + +/* + * fail_fs_rename -- + * POSIX rename. + */ +static int +fail_fs_rename(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *from, const char *to, uint32_t flags) +{ + (void)file_system; /* Unused */ + (void)session; /* Unused */ + (void)flags; /* Unused */ + + return (rename(from, to)); +} + +/* + * fail_fs_size -- + * Get the size of a file in bytes, by file name. + */ +static int +fail_fs_size(WT_FILE_SYSTEM *file_system, + WT_SESSION *session, const char *name, wt_off_t *sizep) +{ + struct stat statbuf; + int ret; + + (void)file_system; /* Unused */ + (void)session; /* Unused */ + + ret = 0; + if ((ret = stat(name, &statbuf)) != 0) + return (ret); + *sizep = statbuf.st_size; + return (0); +} + +/* + * fail_fs_terminate -- + * Discard any resources on termination + */ +static int +fail_fs_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *session) +{ + FAIL_FILE_HANDLE *fail_fh; + FAIL_FILE_SYSTEM *fail_fs; + + fail_fs = (FAIL_FILE_SYSTEM *)file_system; + + while ((fail_fh = TAILQ_FIRST(&fail_fs->fileq)) != NULL) + fail_file_handle_remove(session, fail_fh); + + fail_fs_destroy_lock(&fail_fs->lock); + free(fail_fs); + + return (0); +} + +/* + * wiredtiger_extension_init -- + * WiredTiger fail filesystem extension. + */ +int +wiredtiger_extension_init(WT_CONNECTION *conn, WT_CONFIG_ARG *config) +{ + FAIL_FILE_SYSTEM *fail_fs; + WT_CONFIG_ITEM k, v; + WT_CONFIG_PARSER *config_parser; + WT_EXTENSION_API *wtext; + WT_FILE_SYSTEM *file_system; + int ret; + + ret = 0; + wtext = conn->get_extension_api(conn); + if ((fail_fs = calloc(1, sizeof(FAIL_FILE_SYSTEM))) == NULL) { + (void)wtext->err_printf(wtext, NULL, + "fail_file_system extension_init: %s", + wtext->strerror(wtext, NULL, ENOMEM)); + return (ENOMEM); + } + fail_fs->wtext = wtext; + file_system = (WT_FILE_SYSTEM *)fail_fs; + + /* Get any configuration values. */ + if ((ret = wtext->config_parser_open_arg( + wtext, NULL, config, &config_parser)) != 0) { + (void)wtext->err_printf(wtext, NULL, + "WT_EXTENSION_API.config_parser_open: config: %s", + wtext->strerror(wtext, NULL, ret)); + goto err; + } + while ((ret = config_parser->next(config_parser, &k, &v)) == 0) { + if (fail_fs_arg("allow_writes", &k, &v, &fail_fs->allow_writes)) + continue; + if (fail_fs_arg("allow_reads", &k, &v, &fail_fs->allow_reads)) + continue; + + (void)wtext->err_printf(wtext, NULL, + "WT_CONFIG_PARSER.next: unexpected configuration " + "information: %.*s=%.*s: %s", + (int)k.len, k.str, (int)v.len, v.str, + wtext->strerror(wtext, NULL, ret)); + goto err; + } + if (ret != WT_NOTFOUND) { + (void)wtext->err_printf(wtext, NULL, + "WT_CONFIG_PARSER.next: config: %s", + wtext->strerror(wtext, NULL, ret)); + goto err; + } + if ((ret = config_parser->close(config_parser)) != 0) { + (void)wtext->err_printf(wtext, NULL, + "WT_CONFIG_PARSER.close: config: %s", + wtext->strerror(wtext, NULL, ret)); + goto err; + } + + fail_fs_allocate_lock(&fail_fs->lock); + /* Initialize the in-memory jump table. */ + file_system->fs_directory_list = fail_fs_directory_list; + file_system->fs_directory_list_free = fail_fs_directory_list_free; + file_system->fs_exist = fail_fs_exist; + file_system->fs_open_file = fail_fs_open; + file_system->fs_remove = fail_fs_remove; + file_system->fs_rename = fail_fs_rename; + file_system->fs_size = fail_fs_size; + file_system->terminate = fail_fs_terminate; + if ((ret = conn->set_file_system(conn, file_system, NULL)) != 0) { + (void)wtext->err_printf(wtext, NULL, + "WT_CONNECTION.set_file_system: %s", + wtext->strerror(wtext, NULL, ret)); + goto err; + } + return (0); + +err: free(fail_fs); + return (ret); +} diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index 3f7fc9bb2a7..7203b75e4ae 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -109,16 +109,15 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) * __wt_connection_destroy -- * Destroy the connection's underlying WT_CONNECTION_IMPL structure. */ -int +void __wt_connection_destroy(WT_CONNECTION_IMPL *conn) { - WT_DECL_RET; WT_SESSION_IMPL *session; u_int i; /* Check there's something to destroy. */ if (conn == NULL) - return (0); + return; session = conn->default_session; @@ -149,11 +148,6 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_spin_destroy(session, &conn->page_lock[i]); __wt_free(session, conn->page_lock); - /* Destroy the file-system configuration. */ - if (conn->file_system != NULL && conn->file_system->terminate != NULL) - WT_TRET(conn->file_system->terminate( - conn->file_system, (WT_SESSION *)session)); - /* Free allocated memory. */ __wt_free(session, conn->cfg); __wt_free(session, conn->home); @@ -162,5 +156,4 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_stat_connection_discard(session, conn); __wt_free(NULL, conn); - return (ret); } diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index d4ace127bb2..f8029f2c728 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -159,15 +159,6 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) /* Discard transaction state. */ __wt_txn_global_destroy(session); - /* Close extensions, first calling any unload entry point. */ - while ((dlh = TAILQ_FIRST(&conn->dlhqh)) != NULL) { - TAILQ_REMOVE(&conn->dlhqh, dlh, q); - - if (dlh->terminate != NULL) - WT_TRET(dlh->terminate(wt_conn)); - WT_TRET(__wt_dlclose(session, dlh)); - } - /* Close the lock file, opening up the database to other connections. */ if (conn->lock_fh != NULL) WT_TRET(__wt_close(session, &conn->lock_fh)); @@ -199,8 +190,22 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) __wt_free(session, s->hazard); } + /* Destroy the file-system configuration. */ + if (conn->file_system != NULL && conn->file_system->terminate != NULL) + WT_TRET(conn->file_system->terminate( + conn->file_system, (WT_SESSION *)session)); + + /* Close extensions, first calling any unload entry point. */ + while ((dlh = TAILQ_FIRST(&conn->dlhqh)) != NULL) { + TAILQ_REMOVE(&conn->dlhqh, dlh, q); + + if (dlh->terminate != NULL) + WT_TRET(dlh->terminate(wt_conn)); + WT_TRET(__wt_dlclose(session, dlh)); + } + /* Destroy the handle. */ - WT_TRET(__wt_connection_destroy(conn)); + __wt_connection_destroy(conn); return (ret); } diff --git a/src/include/extern.h b/src/include/extern.h index 566eb386c29..16b3c916b24 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -262,7 +262,7 @@ extern int __wt_conn_dhandle_close_all( WT_SESSION_IMPL *session, const char *ur extern int __wt_conn_dhandle_discard_single( WT_SESSION_IMPL *session, bool final, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_connection_destroy(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_connection_destroy(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_logmgr_reconfig(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_truncate_files( WT_SESSION_IMPL *session, WT_CURSOR *cursor, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_log_wrlsn(WT_SESSION_IMPL *session, int *yield) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/test/csuite/Makefile.am b/test/csuite/Makefile.am index a96492c1e71..bcdbf120d67 100644 --- a/test/csuite/Makefile.am +++ b/test/csuite/Makefile.am @@ -40,6 +40,9 @@ noinst_PROGRAMS += test_wt2853_perf test_wt2999_join_extractor_SOURCES = wt2999_join_extractor/main.c noinst_PROGRAMS += test_wt2999_join_extractor +test_wt3120_filesys_SOURCES = wt3120_filesys/main.c +noinst_PROGRAMS += test_wt3120_filesys + # Run this during a "make check" smoke test. TESTS = $(noinst_PROGRAMS) LOG_COMPILER = $(TEST_WRAPPER) diff --git a/test/csuite/wt3120_filesys/main.c b/test/csuite/wt3120_filesys/main.c new file mode 100644 index 00000000000..abf660db046 --- /dev/null +++ b/test/csuite/wt3120_filesys/main.c @@ -0,0 +1,98 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +/* + * JIRA ticket reference: WT-3120 + * Test case description: A simple file system extension built into + * a shared library. + * Failure mode: Loading the file system and closing the connection + * is enough to evoke the failure. This test does slightly more + * than that. + */ + +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + WT_CURSOR *cursor; + WT_SESSION *session; + char *kstr, *vstr; + char buf[100]; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + snprintf(buf, sizeof(buf), + "create,extensions=" + "[\"../../ext/test/fail_fs/.libs/libwiredtiger_fail_fs.so\"]"); + testutil_check(wiredtiger_open(opts->home, NULL, buf, &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + testutil_check(session->create(session, opts->uri, + "key_format=S,value_format=S")); + + testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, + &cursor)); + cursor->set_key(cursor, "a"); + cursor->set_value(cursor, "0"); + testutil_check(cursor->insert(cursor)); + cursor->set_key(cursor, "b"); + cursor->set_value(cursor, "1"); + testutil_check(cursor->insert(cursor)); + testutil_check(cursor->close(cursor)); + testutil_check(session->close(session, NULL)); + + /* Force to disk and re-open. */ + testutil_check(opts->conn->close(opts->conn, NULL)); + testutil_check(wiredtiger_open(opts->home, NULL, NULL, &opts->conn)); + + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, + &cursor)); + testutil_check(cursor->next(cursor)); + cursor->get_key(cursor, &kstr); + cursor->get_value(cursor, &vstr); + testutil_assert(strcmp(kstr, "a") == 0); + testutil_assert(strcmp(vstr, "0") == 0); + testutil_check(cursor->next(cursor)); + cursor->get_key(cursor, &kstr); + cursor->get_value(cursor, &vstr); + testutil_assert(strcmp(kstr, "b") == 0); + testutil_assert(strcmp(vstr, "1") == 0); + testutil_assert(cursor->next(cursor) == WT_NOTFOUND); + testutil_check(cursor->close(cursor)); + testutil_check(session->close(session, NULL)); + printf("Success\n"); + + testutil_cleanup(opts); + return (EXIT_SUCCESS); +} -- cgit v1.2.1 From 52171b4c668528c80d1e2084183899f294d4c797 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 23 Jan 2017 00:51:14 -0500 Subject: WT-3144 Print WT_REF instead of WT_REF.page in verbose/debugging output. (#3258) --- src/btree/bt_debug.c | 2 +- src/btree/bt_split.c | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index b62125e069d..a89eca230fd 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -652,7 +652,7 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) page = ref->page; mod = page->modify; - WT_RET(ds->f(ds, "%p", (void *)page)); + WT_RET(ds->f(ds, "%p", (void *)ref)); switch (page->type) { case WT_PAGE_COL_INT: diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 6b0b8a08c02..7cfcd08f931 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -2086,8 +2086,7 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref) WT_PAGE *parent; bool hazard; - __wt_verbose( - session, WT_VERB_SPLIT, "%p: split-insert", (void *)ref->page); + __wt_verbose(session, WT_VERB_SPLIT, "%p: split-insert", (void *)ref); WT_RET(__split_internal_lock(session, ref, true, &parent, &hazard)); if ((ret = __split_insert(session, ref)) != 0) { @@ -2178,8 +2177,7 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing) WT_PAGE *parent; bool hazard; - __wt_verbose( - session, WT_VERB_SPLIT, "%p: split-multi", (void *)ref->page); + __wt_verbose(session, WT_VERB_SPLIT, "%p: split-multi", (void *)ref); WT_RET(__split_internal_lock(session, ref, false, &parent, &hazard)); if ((ret = __split_multi(session, ref, closing)) != 0 || closing) { @@ -2207,8 +2205,7 @@ __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref) WT_PAGE *parent; bool hazard; - __wt_verbose( - session, WT_VERB_SPLIT, "%p: reverse-split", (void *)ref->page); + __wt_verbose(session, WT_VERB_SPLIT, "%p: reverse-split", (void *)ref); WT_RET(__split_internal_lock(session, ref, false, &parent, &hazard)); ret = __split_parent(session, ref, NULL, 0, 0, false, true); @@ -2229,8 +2226,7 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi) page = ref->page; - __wt_verbose( - session, WT_VERB_SPLIT, "%p: split-rewrite", (void *)ref->page); + __wt_verbose(session, WT_VERB_SPLIT, "%p: split-rewrite", (void *)ref); /* * This isn't a split: a reconciliation failed because we couldn't write -- cgit v1.2.1 From 5e6ffcc7ef98a609e4bbc0ecfef58dade45de1d7 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 23 Jan 2017 00:53:47 -0500 Subject: WT-3144 Make it less likely for random lookups to return WT_NOTFOUND (#3259) There may be empty pages in the tree, and they're useless to us when trying to find random samples. If we don't find a non-empty page in "entries" random guesses, take the first non-empty page in the tree. If the search page contains nothing other than empty pages, restart from the root some number of times before giving up. --- src/btree/row_srch.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c index aa299a161da..5b3f1195784 100644 --- a/src/btree/row_srch.c +++ b/src/btree/row_srch.c @@ -792,9 +792,11 @@ __wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_PAGE *page; WT_PAGE_INDEX *pindex; WT_REF *current, *descent; + uint32_t i, entries, retry; btree = S2BT(session); current = NULL; + retry = 100; if (0) { restart: /* @@ -812,8 +814,32 @@ restart: /* break; WT_INTL_INDEX_GET(session, page, pindex); - descent = pindex->index[ - __wt_random(&session->rnd) % pindex->entries]; + entries = pindex->entries; + + /* + * There may be empty pages in the tree, and they're useless to + * us. If we don't find a non-empty page in "entries" random + * guesses, take the first non-empty page in the tree. If the + * search page contains nothing other than empty pages, restart + * from the root some number of times before giving up. + */ + for (i = 0; i < entries; ++i) { + descent = + pindex->index[__wt_random(&session->rnd) % entries]; + if (descent->state != WT_REF_DELETED) + break; + } + if (i == entries) + for (i = 0; i < entries; ++i) { + descent = pindex->index[i]; + if (descent->state != WT_REF_DELETED) + break; + } + if (i == entries) { + if (--retry > 0) + goto restart; + return (WT_NOTFOUND); + } /* * Swap the current page for the child page. If the page splits -- cgit v1.2.1 From f214daa45a860021f107c498ddfd1328b6b3f517 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 23 Jan 2017 07:49:41 -0500 Subject: WT-3144 bug fix: random cursor returns not-found when descending to an empty page. clang 3.8 complains descent might be left uninitialized in some case. I don't think that's possible, but it's a simple change. --- src/btree/row_srch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c index 5b3f1195784..1c3d5ad5daa 100644 --- a/src/btree/row_srch.c +++ b/src/btree/row_srch.c @@ -823,6 +823,7 @@ restart: /* * search page contains nothing other than empty pages, restart * from the root some number of times before giving up. */ + descent = NULL; for (i = 0; i < entries; ++i) { descent = pindex->index[__wt_random(&session->rnd) % entries]; @@ -835,7 +836,7 @@ restart: /* if (descent->state != WT_REF_DELETED) break; } - if (i == entries) { + if (i == entries || descent == NULL) { if (--retry > 0) goto restart; return (WT_NOTFOUND); -- cgit v1.2.1 From b2ab33d476c657120c56ed31aa05f54557f010e0 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 23 Jan 2017 11:34:06 -0500 Subject: WT-3120 Fix ordering problem in connection_close for filesystem loaded in an extension (#3261) This commit represents fixes for Coverity errors, LeakSanitizer errors, and additional cleanup: * pread/pwrite return value is -1 on error, but the error is in errno. * Convert size_t and off_t to uintmax_t/PRIuMAX, not uint64_t/PRIu64. * Coverity ID 1369085 (#1 of 1): Extra sizeof expression (SIZEOF_MISMATCH) suspicious_pointer_arithmetic: Adding allocated * 8UL /* sizeof (char *) */ to pointer entries of type char ** is suspicious because adding an integral value to this pointer automatically scales that value by the size, 8 bytes, of the pointed-to type, char *. Most likely, the multiplication by sizeof (char *) in this expression is extraneous and should be eliminated. * CID 1369084 (#1 of 1): Resource leak (RESOURCE_LEAK) 9. overwrite_var: Overwriting handle ret in ret = 12 leaks the handle. * CID 1369083 (#1 of 1): Logically dead code (DEADCODE) dead_error_line: Execution cannot reach this statement: while (count > 0U) null: At condition entries != NULL, the value of entries must be NULL. dead_error_condition: The condition entries != NULL cannot be true. * Custom filesystems have to configure early-load, otherwise we'll have already configured a default filesystem by the time the extension is loaded. * Add early-load configuration to the wt3120_filesys test. * Add code to WiredTiger that fails if a custom filesystem is configured after we've already configured a default filesystem. --- examples/c/ex_file_system.c | 13 ++++++---- ext/test/fail_fs/fail_fs.c | 50 +++++++++++++++++++++++---------------- src/conn/conn_api.c | 10 ++++++++ test/csuite/wt3120_filesys/main.c | 7 +++--- 4 files changed, 51 insertions(+), 29 deletions(-) diff --git a/examples/c/ex_file_system.c b/examples/c/ex_file_system.c index 56869171558..e807ac54d3b 100644 --- a/examples/c/ex_file_system.c +++ b/examples/c/ex_file_system.c @@ -399,6 +399,7 @@ demo_fs_directory_list(WT_FILE_SYSTEM *file_system, uint32_t allocated, count; int ret = 0; char *name, **entries; + void *p; (void)session; /* Unused */ @@ -424,14 +425,16 @@ demo_fs_directory_list(WT_FILE_SYSTEM *file_system, * matter if the list is a bit longer than necessary. */ if (count >= allocated) { - entries = realloc( - entries, (allocated + 10) * sizeof(char *)); - if (entries == NULL) { + p = realloc( + entries, (allocated + 10) * sizeof(*entries)); + if (p == NULL) { ret = ENOMEM; goto err; } - memset(entries + allocated * sizeof(char *), - 0, 10 * sizeof(char *)); + + entries = p; + memset(entries + allocated * sizeof(*entries), + 0, 10 * sizeof(*entries)); allocated += 10; } entries[count++] = strdup(name); diff --git a/ext/test/fail_fs/fail_fs.c b/ext/test/fail_fs/fail_fs.c index e2538023a2c..29d469768c5 100644 --- a/ext/test/fail_fs/fail_fs.c +++ b/ext/test/fail_fs/fail_fs.c @@ -224,10 +224,11 @@ fail_file_read(WT_FILE_HANDLE *file_handle, chunk = (len < FAIL_FS_GIGABYTE) ? len : FAIL_FS_GIGABYTE; if ((nr = pread(fail_fh->fd, addr, chunk, offset)) <= 0) { (void)wtext->err_printf(wtext, session, - "%s: handle-read: failed to read %" PRIu64 - " bytes at offset %" PRIu64 ": %s", - fail_fh->iface.name, (uint64_t)len, - (uint64_t)offset, wtext->strerror(wtext, NULL, nr)); + "%s: handle-read: failed to read %" PRIuMAX + " bytes at offset %" PRIuMAX ": %s", + fail_fh->iface.name, + (uintmax_t)len, (uintmax_t)offset, + wtext->strerror(wtext, NULL, errno)); ret = (nr == 0 ? WT_ERROR : errno); break; } @@ -327,10 +328,11 @@ fail_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *session, chunk = (len < FAIL_FS_GIGABYTE) ? len : FAIL_FS_GIGABYTE; if ((nr = pwrite(fail_fh->fd, addr, chunk, offset)) <= 0) { (void)wtext->err_printf(wtext, session, - "%s: handle-write: failed to write %" PRIu64 - " bytes at offset %" PRIu64 ": %s", - fail_fh->iface.name, (uint64_t)len, - (uint64_t)offset, wtext->strerror(wtext, NULL, nr)); + "%s: handle-write: failed to write %" PRIuMAX + " bytes at offset %" PRIuMAX ": %s", + fail_fh->iface.name, + (uintmax_t)len, (uintmax_t)offset, + wtext->strerror(wtext, NULL, errno)); ret = (nr == 0 ? WT_ERROR : errno); break; } @@ -376,6 +378,7 @@ fail_fs_directory_list(WT_FILE_SYSTEM *file_system, uint32_t allocated, count; int ret; char *name, **entries; + void *p; (void)session; /* Unused */ @@ -401,14 +404,15 @@ fail_fs_directory_list(WT_FILE_SYSTEM *file_system, * matter if the list is a bit longer than necessary. */ if (count >= allocated) { - entries = realloc( - entries, (allocated + 10) * sizeof(char *)); - if (entries == NULL) { + p = realloc( + entries, (allocated + 10) * sizeof(*entries)); + if (p == NULL) { ret = ENOMEM; goto err; } - memset(entries + allocated * sizeof(char *), - 0, 10 * sizeof(char *)); + entries = p; + memset(entries + allocated * sizeof(*entries), + 0, 10 * sizeof(*entries)); allocated += 10; } entries[count++] = strdup(name); @@ -476,16 +480,17 @@ fail_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, FAIL_FILE_HANDLE *fail_fh; FAIL_FILE_SYSTEM *fail_fs; WT_FILE_HANDLE *file_handle; - int open_flags; - int ret; + int fd, open_flags, ret; (void)file_type; /* Unused */ (void)session; /* Unused */ *file_handlep = NULL; - ret = 0; - fail_fs = (FAIL_FILE_SYSTEM *)file_system; + fail_fh = NULL; + fail_fs = (FAIL_FILE_SYSTEM *)file_system; + fd = -1; + ret = 0; fail_fs_lock(&fail_fs->lock); @@ -499,8 +504,10 @@ fail_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, else open_flags |= O_RDWR; - if ((ret = open(name, open_flags, 0666)) < 0) + if ((fd = open(name, open_flags, 0666)) < 0) { + ret = errno; goto err; + } /* We create a handle structure for each open. */ if ((fail_fh = calloc(1, sizeof(FAIL_FILE_HANDLE))) == NULL) { @@ -510,8 +517,7 @@ fail_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, /* Initialize private information. */ fail_fh->fail_fs = fail_fs; - fail_fh->fd = ret; - ret = 0; + fail_fh->fd = fd; /* Initialize public information. */ file_handle = (WT_FILE_HANDLE *)fail_fh; @@ -542,7 +548,9 @@ fail_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, *file_handlep = file_handle; if (0) { -err: free(fail_fh); +err: if (fd != -1) + (void)close(fd); + free(fail_fh); } fail_fs_unlock(&fail_fs->lock); diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index f691a76b1f2..d76e08067b5 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1987,6 +1987,16 @@ __conn_set_file_system( CONNECTION_API_CALL(conn, session, set_file_system, config, cfg); WT_UNUSED(cfg); + /* + * You can only configure a file system once, and attempting to do it + * again probably means the extension argument didn't have early-load + * set and we've already configured the default file system. + */ + if (conn->file_system != NULL) + WT_ERR_MSG(session, EPERM, + "filesystem already configured; custom filesystems should " + "enable \"early_load\" configuration"); + conn->file_system = file_system; err: API_END_RET(session, ret); diff --git a/test/csuite/wt3120_filesys/main.c b/test/csuite/wt3120_filesys/main.c index abf660db046..a4b830d6a70 100644 --- a/test/csuite/wt3120_filesys/main.c +++ b/test/csuite/wt3120_filesys/main.c @@ -36,6 +36,8 @@ * than that. */ +#define WT_FAIL_FS_LIB "../../ext/test/fail_fs/.libs/libwiredtiger_fail_fs.so" + int main(int argc, char *argv[]) { @@ -43,7 +45,7 @@ main(int argc, char *argv[]) WT_CURSOR *cursor; WT_SESSION *session; char *kstr, *vstr; - char buf[100]; + char buf[1024]; opts = &_opts; memset(opts, 0, sizeof(*opts)); @@ -51,8 +53,7 @@ main(int argc, char *argv[]) testutil_make_work_dir(opts->home); snprintf(buf, sizeof(buf), - "create,extensions=" - "[\"../../ext/test/fail_fs/.libs/libwiredtiger_fail_fs.so\"]"); + "create,extensions=(" WT_FAIL_FS_LIB "=(early_load=true))"); testutil_check(wiredtiger_open(opts->home, NULL, buf, &opts->conn)); testutil_check( opts->conn->open_session(opts->conn, NULL, NULL, &session)); -- cgit v1.2.1 From d7dc59045b87a37f029c0046082489af557c7018 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Mon, 23 Jan 2017 17:49:50 -0500 Subject: WT-2790 Fix a text case false positive in test_sweep01. (#3263) --- test/suite/test_sweep01.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/suite/test_sweep01.py b/test/suite/test_sweep01.py index 71f8fcb180e..5559190caca 100644 --- a/test/suite/test_sweep01.py +++ b/test/suite/test_sweep01.py @@ -116,10 +116,15 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess): # Give slow machines time to process files. stat_cursor = self.session.open_cursor('statistics:', None, None) this_nfile = stat_cursor[stat.conn.file_open][2] + removed = stat_cursor[stat.conn.dh_sweep_remove][2] stat_cursor.close() self.pr("==== loop " + str(sleep)) self.pr("this_nfile " + str(this_nfile)) - if this_nfile == final_nfile: + self.pr("removed " + str(removed)) + # On slow machines there can be a lag where files get closed but + # the sweep server cannot yet remove the handles. So wait for the + # removed statistic to indicate forward progress too. + if this_nfile == final_nfile and removed != remove1: break c.close() self.pr("Sweep loop took " + str(sleep)) -- cgit v1.2.1 From 75345eabdf5e54aa56fa51134fc53d5ae75aa7d8 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 23 Jan 2017 18:05:36 -0500 Subject: WT-3120 Add error handling to get_key/get_value in a test (#3262) --- test/csuite/wt3120_filesys/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/csuite/wt3120_filesys/main.c b/test/csuite/wt3120_filesys/main.c index a4b830d6a70..09dce624066 100644 --- a/test/csuite/wt3120_filesys/main.c +++ b/test/csuite/wt3120_filesys/main.c @@ -80,13 +80,13 @@ main(int argc, char *argv[]) testutil_check(session->open_cursor(session, opts->uri, NULL, NULL, &cursor)); testutil_check(cursor->next(cursor)); - cursor->get_key(cursor, &kstr); - cursor->get_value(cursor, &vstr); + testutil_check(cursor->get_key(cursor, &kstr)); + testutil_check(cursor->get_value(cursor, &vstr)); testutil_assert(strcmp(kstr, "a") == 0); testutil_assert(strcmp(vstr, "0") == 0); testutil_check(cursor->next(cursor)); - cursor->get_key(cursor, &kstr); - cursor->get_value(cursor, &vstr); + testutil_check(cursor->get_key(cursor, &kstr)); + testutil_check(cursor->get_value(cursor, &vstr)); testutil_assert(strcmp(kstr, "b") == 0); testutil_assert(strcmp(vstr, "1") == 0); testutil_assert(cursor->next(cursor) == WT_NOTFOUND); -- cgit v1.2.1 From 314675c75a777f18995cbac6303b3065c88f5e06 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Tue, 24 Jan 2017 01:30:09 -0500 Subject: WT-3137 Fix a hang in logging due to a race condition (#3223) --- src/include/log.h | 1 + src/log/log_slot.c | 199 +++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 156 insertions(+), 44 deletions(-) diff --git a/src/include/log.h b/src/include/log.h index d9fea892c68..82fcbf1be58 100644 --- a/src/include/log.h +++ b/src/include/log.h @@ -254,6 +254,7 @@ struct __wt_log { #define WT_SLOT_POOL 128 WT_LOGSLOT *active_slot; /* Active slot */ WT_LOGSLOT slot_pool[WT_SLOT_POOL]; /* Pool of all slots */ + int32_t pool_index; /* Index into slot pool */ size_t slot_buf_size; /* Buffer size for slots */ #ifdef HAVE_DIAGNOSTIC uint64_t write_calls; /* Calls to log_write */ diff --git a/src/log/log_slot.c b/src/log/log_slot.c index a29a34e5652..cb44cadcb70 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -8,6 +8,49 @@ #include "wt_internal.h" +#ifdef HAVE_DIAGNOSTIC +/* + * __log_slot_dump -- + * Dump the entire slot state. + */ +static void +__log_slot_dump(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_LOG *log; + WT_LOGSLOT *slot; + int32_t earliest, i; + + conn = S2C(session); + log = conn->log; + earliest = 0; + for (i = 0; i < WT_SLOT_POOL; i++) { + slot = &log->slot_pool[i]; + if (__wt_log_cmp(&slot->slot_release_lsn, + &log->slot_pool[earliest].slot_release_lsn) < 0) + earliest = i; + __wt_errx(session, "Slot %d:", i); + __wt_errx(session, " State: %" PRIx64 " Flags: %" PRIx32, + slot->slot_state, slot->flags); + __wt_errx(session, " Start LSN: %" PRIu32 "/%" PRIu32, + slot->slot_start_lsn.l.file, slot->slot_start_lsn.l.offset); + __wt_errx(session, " End LSN: %" PRIu32 "/%" PRIu32, + slot->slot_end_lsn.l.file, slot->slot_end_lsn.l.offset); + __wt_errx(session, " Release LSN: %" PRIu32 "/%" PRIu32, + slot->slot_release_lsn.l.file, + slot->slot_release_lsn.l.offset); + __wt_errx(session, " Offset: start: %" PRIu32 + " last:%" PRIu32, (uint32_t)slot->slot_start_offset, + (uint32_t)slot->slot_last_offset); + __wt_errx(session, " Unbuffered: %" PRId64 + " error: %" PRId32, slot->slot_unbuffered, + slot->slot_error); + } + __wt_errx(session, "Earliest slot: %d", earliest); + +} +#endif + /* * __wt_log_slot_activate -- * Initialize a slot to become active. @@ -21,7 +64,6 @@ __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) conn = S2C(session); log = conn->log; - slot->slot_state = 0; /* * !!! slot_release_lsn must be set outside this function because * this function may be called after a log file switch and the @@ -30,12 +72,19 @@ __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) * set for closing the file handle on a log file switch. The flags * are reset when the slot is freed. See log_slot_free. */ + slot->slot_unbuffered = 0; slot->slot_start_lsn = slot->slot_end_lsn = log->alloc_lsn; slot->slot_start_offset = log->alloc_lsn.l.offset; slot->slot_last_offset = log->alloc_lsn.l.offset; slot->slot_fh = log->log_fh; slot->slot_error = 0; - slot->slot_unbuffered = 0; + WT_DIAGNOSTIC_YIELD; + /* + * Set the slot state last. Other threads may have a stale pointer + * to this slot and could try to alter the state and other fields once + * they see the state cleared. + */ + WT_PUBLISH(slot->slot_state, 0); } /* @@ -50,6 +99,10 @@ __log_slot_close( WT_CONNECTION_IMPL *conn; WT_LOG *log; int64_t end_offset, new_state, old_state; +#ifdef HAVE_DIAGNOSTIC + struct timespec begin, now; + int count; +#endif WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT)); WT_ASSERT(session, releasep != NULL); @@ -101,9 +154,32 @@ retry: * that value. If the state is unbuffered, wait for the unbuffered * size to be set. */ - while (WT_LOG_SLOT_UNBUFFERED_ISSET(old_state) && - slot->slot_unbuffered == 0) - __wt_yield(); +#ifdef HAVE_DIAGNOSTIC + count = 0; + __wt_epoch(session, &begin); +#endif + if (WT_LOG_SLOT_UNBUFFERED_ISSET(old_state)) { + while (slot->slot_unbuffered == 0) { + __wt_yield(); +#ifdef HAVE_DIAGNOSTIC + ++count; + if (count > WT_MILLION) { + __wt_epoch(session, &now); + if (WT_TIMEDIFF_SEC(now, begin) > 10) { + __wt_errx(session, "SLOT_CLOSE: Slot %" + PRIu32 " Timeout unbuffered, state 0x%" + PRIx64 " unbuffered %" PRIu64, + (uint32_t)(slot - &log->slot_pool[0]), + slot->slot_state, + slot->slot_unbuffered); + __log_slot_dump(session); + __wt_abort(session); + } + count = 0; + } +#endif + } + } end_offset = WT_LOG_SLOT_JOINED_BUFFERED(old_state) + slot->slot_unbuffered; @@ -218,7 +294,11 @@ __wt_log_slot_new(WT_SESSION_IMPL *session) WT_CONNECTION_IMPL *conn; WT_LOG *log; WT_LOGSLOT *slot; - int32_t i; + int32_t i, pool_i; +#ifdef HAVE_DIAGNOSTIC + struct timespec begin, now; + int count; +#endif WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT)); conn = S2C(session); @@ -232,16 +312,22 @@ __wt_log_slot_new(WT_SESSION_IMPL *session) WT_LOG_SLOT_OPEN(slot->slot_state)) return (0); +#ifdef HAVE_DIAGNOSTIC + count = 0; + __wt_epoch(session, &begin); +#endif /* * Keep trying until we can find a free slot. */ for (;;) { /* - * For now just restart at 0. We could use log->pool_index - * if that is inefficient. + * Rotate among the slots to lessen collisions. */ - for (i = 0; i < WT_SLOT_POOL; i++) { - slot = &log->slot_pool[i]; + for (i = 0, pool_i = log->pool_index; i < WT_SLOT_POOL; + i++, pool_i++) { + if (pool_i >= WT_SLOT_POOL) + pool_i = 0; + slot = &log->slot_pool[pool_i]; if (slot->slot_state == WT_LOG_SLOT_FREE) { /* * Acquire our starting position in the @@ -256,6 +342,7 @@ __wt_log_slot_new(WT_SESSION_IMPL *session) WT_STAT_CONN_INCR(session, log_slot_transitions); log->active_slot = slot; + log->pool_index = pool_i; return (0); } } @@ -264,6 +351,19 @@ __wt_log_slot_new(WT_SESSION_IMPL *session) */ __wt_cond_auto_signal(session, conn->log_wrlsn_cond); __wt_yield(); +#ifdef HAVE_DIAGNOSTIC + ++count; + if (count > WT_MILLION) { + __wt_epoch(session, &now); + if (WT_TIMEDIFF_SEC(now, begin) > 10) { + __wt_errx(session, + "SLOT_NEW: Timeout free slot"); + __log_slot_dump(session); + __wt_abort(session); + } + count = 0; + } +#endif } /* NOTREACHED */ } @@ -311,10 +411,13 @@ __wt_log_slot_init(WT_SESSION_IMPL *session) /* * We cannot initialize the release LSN in the activate function * because that function can be called after a log file switch. + * The release LSN is usually the same as the slot_start_lsn except + * around a log file switch. */ slot->slot_release_lsn = log->alloc_lsn; __wt_log_slot_activate(session, slot); log->active_slot = slot; + log->pool_index = 0; if (0) { err: while (--i >= 0) @@ -370,53 +473,62 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, WT_LOGSLOT *slot; int64_t flag_state, new_state, old_state, released; int32_t join_offset, new_join; -#ifdef HAVE_DIAGNOSTIC - bool unbuf_force; -#endif + bool unbuffered, yld; conn = S2C(session); log = conn->log; WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT)); + WT_ASSERT(session, mysize != 0); /* * There should almost always be a slot open. */ + unbuffered = false; #ifdef HAVE_DIAGNOSTIC - unbuf_force = (++log->write_calls % WT_THOUSAND) == 0; + yld = (++log->write_calls % 7) == 0; + if ((log->write_calls % WT_THOUSAND) == 0 || + mysize > WT_LOG_SLOT_BUF_MAX) { +#else + yld = false; + if (mysize > WT_LOG_SLOT_BUF_MAX) { #endif + unbuffered = true; + F_SET(myslot, WT_MYSLOT_UNBUFFERED); + } for (;;) { WT_BARRIER(); slot = log->active_slot; old_state = slot->slot_state; - /* - * Try to join our size into the existing size and - * atomically write it back into the state. - */ - flag_state = WT_LOG_SLOT_FLAGS(old_state); - released = WT_LOG_SLOT_RELEASED(old_state); - join_offset = WT_LOG_SLOT_JOINED(old_state); -#ifdef HAVE_DIAGNOSTIC - if (unbuf_force || mysize > WT_LOG_SLOT_BUF_MAX) { -#else - if (mysize > WT_LOG_SLOT_BUF_MAX) { -#endif - new_join = join_offset + WT_LOG_SLOT_UNBUFFERED; - F_SET(myslot, WT_MYSLOT_UNBUFFERED); - myslot->slot = slot; - } else - new_join = join_offset + (int32_t)mysize; - new_state = (int64_t)WT_LOG_SLOT_JOIN_REL( - (int64_t)new_join, (int64_t)released, (int64_t)flag_state); - - /* - * Check if the slot is open for joining and we are able to - * swap in our size into the state. - */ - if (WT_LOG_SLOT_OPEN(old_state) && - __wt_atomic_casiv64( - &slot->slot_state, old_state, new_state)) - break; + if (WT_LOG_SLOT_OPEN(old_state)) { + /* + * Try to join our size into the existing size and + * atomically write it back into the state. + */ + flag_state = WT_LOG_SLOT_FLAGS(old_state); + released = WT_LOG_SLOT_RELEASED(old_state); + join_offset = WT_LOG_SLOT_JOINED(old_state); + if (unbuffered) + new_join = join_offset + WT_LOG_SLOT_UNBUFFERED; + else + new_join = join_offset + (int32_t)mysize; + new_state = (int64_t)WT_LOG_SLOT_JOIN_REL( + (int64_t)new_join, (int64_t)released, + (int64_t)flag_state); + + /* + * Braces used due to potential empty body warning. + */ + if (yld) { + WT_DIAGNOSTIC_YIELD; + } + /* + * Attempt to swap our size into the state. + */ + if (__wt_atomic_casiv64( + &slot->slot_state, old_state, new_state)) + break; + } /* * The slot is no longer open or we lost the race to * update it. Yield and try again. @@ -428,8 +540,7 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, * We joined this slot. Fill in our information to return to * the caller. */ - if (mysize != 0) - WT_STAT_CONN_INCR(session, log_slot_joins); + WT_STAT_CONN_INCR(session, log_slot_joins); if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC)) F_SET(slot, WT_SLOT_SYNC_DIR); if (LF_ISSET(WT_LOG_FLUSH)) -- cgit v1.2.1 From 3695a0dd4dbb1612518ed3f68a2e3c6e7550e0ed Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 24 Jan 2017 09:09:24 -0500 Subject: WT-3137 Fix a hang in logging due to a race condition (#3266) Lint: Don't print int32_t's with %d. WT_LOGSLOT.slot_error is an int, not an int32_t. Don't print off_t's as 32-bits, use the maximum size unsigned object. --- src/include/log.h | 2 +- src/log/log_slot.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/include/log.h b/src/include/log.h index 82fcbf1be58..a6be3582b4d 100644 --- a/src/include/log.h +++ b/src/include/log.h @@ -163,7 +163,7 @@ struct __wt_logslot { WT_CACHE_LINE_PAD_BEGIN volatile int64_t slot_state; /* Slot state */ int64_t slot_unbuffered; /* Unbuffered data in this slot */ - int32_t slot_error; /* Error value */ + int slot_error; /* Error value */ wt_off_t slot_start_offset; /* Starting file offset */ wt_off_t slot_last_offset; /* Last record offset */ WT_LSN slot_release_lsn; /* Slot release LSN */ diff --git a/src/log/log_slot.c b/src/log/log_slot.c index cb44cadcb70..d70c0d689be 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -19,7 +19,7 @@ __log_slot_dump(WT_SESSION_IMPL *session) WT_CONNECTION_IMPL *conn; WT_LOG *log; WT_LOGSLOT *slot; - int32_t earliest, i; + int earliest, i; conn = S2C(session); log = conn->log; @@ -39,9 +39,9 @@ __log_slot_dump(WT_SESSION_IMPL *session) __wt_errx(session, " Release LSN: %" PRIu32 "/%" PRIu32, slot->slot_release_lsn.l.file, slot->slot_release_lsn.l.offset); - __wt_errx(session, " Offset: start: %" PRIu32 - " last:%" PRIu32, (uint32_t)slot->slot_start_offset, - (uint32_t)slot->slot_last_offset); + __wt_errx(session, " Offset: start: %" PRIuMAX + " last:%" PRIuMAX, (uintmax_t)slot->slot_start_offset, + (uintmax_t)slot->slot_last_offset); __wt_errx(session, " Unbuffered: %" PRId64 " error: %" PRId32, slot->slot_unbuffered, slot->slot_error); -- cgit v1.2.1 From d5ae763f990af5ba5522b07c18b9b37fdaae0e88 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 24 Jan 2017 20:28:32 -0500 Subject: WT-3113 Add a verbose mode to dump the cache when eviction is stuck. (#3234) --- dist/api_data.py | 1 + dist/flags.py | 1 + src/config/config_def.c | 60 +++---- src/conn/conn_api.c | 1 + src/evict/evict_lru.c | 370 ++++++++++++++++-------------------------- src/include/cache.h | 2 +- src/include/extern.h | 3 +- src/include/flags.h | 45 ++--- src/include/wiredtiger.in | 26 +-- src/txn/txn.c | 95 +++++++++++ test/suite/test_reconfig04.py | 2 - 11 files changed, 305 insertions(+), 301 deletions(-) diff --git a/dist/api_data.py b/dist/api_data.py index 324d1e4f281..b1332320a7c 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -524,6 +524,7 @@ connection_runtime_config = [ 'checkpoint', 'compact', 'evict', + 'evict_stuck', 'evictserver', 'fileops', 'handleops', diff --git a/dist/flags.py b/dist/flags.py index 70e18712839..55ce233e60d 100644 --- a/dist/flags.py +++ b/dist/flags.py @@ -64,6 +64,7 @@ flags = { 'VERB_COMPACT', 'VERB_EVICT', 'VERB_EVICTSERVER', + 'VERB_EVICT_STUCK', 'VERB_FILEOPS', 'VERB_HANDLEOPS', 'VERB_LOG', diff --git a/src/config/config_def.c b/src/config/config_def.c index 6a93c1d05e2..b11a8d63fdb 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -147,12 +147,12 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs, 5 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," - "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," - "\"read\",\"rebalance\",\"reconcile\",\"recovery\"," - "\"recovery_progress\",\"salvage\",\"shared_cache\",\"split\"," - "\"temporary\",\"thread_group\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," + "\"handleops\",\"log\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"transaction\"," + "\"verify\",\"version\",\"write\"]", NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -750,12 +750,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { { "use_environment_priv", "boolean", NULL, NULL, NULL, 0 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," - "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," - "\"read\",\"rebalance\",\"reconcile\",\"recovery\"," - "\"recovery_progress\",\"salvage\",\"shared_cache\",\"split\"," - "\"temporary\",\"thread_group\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," + "\"handleops\",\"log\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"transaction\"," + "\"verify\",\"version\",\"write\"]", NULL, 0 }, { "write_through", "list", NULL, "choices=[\"data\",\"log\"]", @@ -837,12 +837,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { { "use_environment_priv", "boolean", NULL, NULL, NULL, 0 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," - "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," - "\"read\",\"rebalance\",\"reconcile\",\"recovery\"," - "\"recovery_progress\",\"salvage\",\"shared_cache\",\"split\"," - "\"temporary\",\"thread_group\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," + "\"handleops\",\"log\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"transaction\"," + "\"verify\",\"version\",\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { "write_through", "list", @@ -919,12 +919,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { confchk_wiredtiger_open_transaction_sync_subconfigs, 2 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," - "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," - "\"read\",\"rebalance\",\"reconcile\",\"recovery\"," - "\"recovery_progress\",\"salvage\",\"shared_cache\",\"split\"," - "\"temporary\",\"thread_group\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," + "\"handleops\",\"log\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"transaction\"," + "\"verify\",\"version\",\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { "write_through", "list", @@ -1001,12 +1001,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { confchk_wiredtiger_open_transaction_sync_subconfigs, 2 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," - "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," - "\"read\",\"rebalance\",\"reconcile\",\"recovery\"," - "\"recovery_progress\",\"salvage\",\"shared_cache\",\"split\"," - "\"temporary\",\"thread_group\",\"transaction\",\"verify\"," - "\"version\",\"write\"]", + "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," + "\"handleops\",\"log\",\"lsm\",\"lsm_manager\",\"metadata\"," + "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," + "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," + "\"split\",\"temporary\",\"thread_group\",\"transaction\"," + "\"verify\",\"version\",\"write\"]", NULL, 0 }, { "write_through", "list", NULL, "choices=[\"data\",\"log\"]", diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index d76e08067b5..124250a7a7d 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1798,6 +1798,7 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) { "checkpoint", WT_VERB_CHECKPOINT }, { "compact", WT_VERB_COMPACT }, { "evict", WT_VERB_EVICT }, + { "evict_stuck", WT_VERB_EVICT_STUCK }, { "evictserver", WT_VERB_EVICTSERVER }, { "fileops", WT_VERB_FILEOPS }, { "handleops", WT_VERB_HANDLEOPS }, diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 9b969de9a9e..0cf746f84eb 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -285,7 +285,7 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) conn = S2C(session); cache = conn->cache; -#ifdef HAVE_DIAGNOSTIC +#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE) /* * Ensure the cache stuck timer is initialized when starting eviction. */ @@ -353,12 +353,12 @@ err: WT_PANIC_MSG(session, ret, "cache eviction thread error"); static int __evict_server(WT_SESSION_IMPL *session, bool *did_work) { +#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE) + struct timespec now; +#endif WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; -#ifdef HAVE_DIAGNOSTIC - struct timespec now; -#endif uint64_t orig_pages_evicted; conn = S2C(session); @@ -395,11 +395,15 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work) cache->pages_evicted = 0; } else if (cache->pages_evicted != cache->pages_evict) { cache->pages_evicted = cache->pages_evict; -#ifdef HAVE_DIAGNOSTIC +#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE) __wt_epoch(session, &cache->stuck_ts); } else if (!F_ISSET(conn, WT_CONN_IN_MEMORY)) { /* - * After being stuck for 5 minutes, give up. + * If we're stuck for 5 minutes in diagnostic mode, or the + * verbose evict_stuck flag is configured, log the cache + * and transaction state. + * + * If we're stuck for 5 minutes in diagnostic mode, give up. * * We don't do this check for in-memory workloads because * application threads are not blocked by the cache being full. @@ -408,11 +412,22 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work) */ __wt_epoch(session, &now); if (WT_TIMEDIFF_SEC(now, cache->stuck_ts) > 300) { - ret = ETIMEDOUT; - __wt_err(session, ret, +#if defined(HAVE_DIAGNOSTIC) + __wt_err(session, ETIMEDOUT, "Cache stuck for too long, giving up"); - WT_TRET(__wt_dump_stuck_info(session, NULL)); + ret = ETIMEDOUT; + WT_TRET(__wt_verbose_dump_txn(session)); + WT_TRET(__wt_verbose_dump_cache(session)); return (ret); +#elif defined(HAVE_VERBOSE) + if (WT_VERBOSE_ISSET(session, WT_VERB_EVICT_STUCK)) { + WT_RET(__wt_verbose_dump_txn(session)); + WT_RET(__wt_verbose_dump_cache(session)); + + /* Reset the timer. */ + __wt_epoch(session, &cache->stuck_ts); + } +#endif } #endif } @@ -2184,226 +2199,138 @@ __wt_evict_priority_clear(WT_SESSION_IMPL *session) S2BT(session)->evict_priority = 0; } -#ifdef HAVE_DIAGNOSTIC +#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE) /* - * __dump_txn_state -- - * Output debugging information about the global transaction state. + * __verbose_dump_cache_single -- + * Output diagnostic information about a single file in the cache. */ static int -__dump_txn_state(WT_SESSION_IMPL *session, FILE *fp) +__verbose_dump_cache_single(WT_SESSION_IMPL *session, + uint64_t *total_bytesp, uint64_t *total_dirty_bytesp) { - WT_CONNECTION_IMPL *conn; - WT_TXN_GLOBAL *txn_global; - WT_TXN *txn; - WT_TXN_STATE *s; - const char *iso_tag; - uint64_t id; - uint32_t i, session_cnt; - - conn = S2C(session); - txn_global = &conn->txn_global; - WT_ORDERED_READ(session_cnt, conn->session_cnt); - - /* Note: odd string concatenation avoids spelling errors. */ - if (fprintf(fp, "==========\n" "transaction state dump\n") < 0) - return (EIO); - - if (fprintf(fp, - "current ID: %" PRIu64 "\n" - "last running ID: %" PRIu64 "\n" - "oldest ID: %" PRIu64 "\n" - "oldest named snapshot ID: %" PRIu64 "\n", - txn_global->current, txn_global->last_running, - txn_global->oldest_id, txn_global->nsnap_oldest_id) < 0) - return (EIO); - - if (fprintf(fp, - "checkpoint running? %s\n" - "checkpoint generation: %" PRIu64 "\n" - "checkpoint pinned ID: %" PRIu64 "\n" - "checkpoint txn ID: %" PRIu64 "\n" - "session count: %" PRIu32 "\n", - txn_global->checkpoint_running ? "yes" : "no", - txn_global->checkpoint_gen, - txn_global->checkpoint_pinned, - txn_global->checkpoint_txnid, - session_cnt) < 0) - return (EIO); - - if (fprintf(fp, "Dumping transaction state of active sessions\n") < 0) - return (EIO); - - /* - * Walk each session transaction state and dump information. Accessing - * the content of session handles is not thread safe, so some - * information may change while traversing if other threads are active - * at the same time, which is OK since this is diagnostic code. - */ - for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) { - /* Skip sessions with no active transaction */ - if ((id = s->id) == WT_TXN_NONE && s->pinned_id == WT_TXN_NONE) - continue; + WT_DATA_HANDLE *dhandle; + WT_PAGE *page; + WT_REF *next_walk; + size_t size; + uint64_t intl_bytes, intl_bytes_max, intl_dirty_bytes; + uint64_t intl_dirty_bytes_max, intl_dirty_pages, intl_pages; + uint64_t leaf_bytes, leaf_bytes_max, leaf_dirty_bytes; + uint64_t leaf_dirty_bytes_max, leaf_dirty_pages, leaf_pages; - txn = &conn->sessions[i].txn; - iso_tag = "INVALID"; - switch (txn->isolation) { - case WT_ISO_READ_COMMITTED: - iso_tag = "WT_ISO_READ_COMMITTED"; - break; - case WT_ISO_READ_UNCOMMITTED: - iso_tag = "WT_ISO_READ_UNCOMMITTED"; - break; - case WT_ISO_SNAPSHOT: - iso_tag = "WT_ISO_SNAPSHOT"; - break; + intl_bytes = intl_bytes_max = intl_dirty_bytes = 0; + intl_dirty_bytes_max = intl_dirty_pages = intl_pages = 0; + leaf_bytes = leaf_bytes_max = leaf_dirty_bytes = 0; + leaf_dirty_bytes_max = leaf_dirty_pages = leaf_pages = 0; + + next_walk = NULL; + while (__wt_tree_walk(session, &next_walk, + WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 && + next_walk != NULL) { + page = next_walk->page; + size = page->memory_footprint; + + if (WT_PAGE_IS_INTERNAL(page)) { + ++intl_pages; + intl_bytes += size; + intl_bytes_max = WT_MAX(intl_bytes_max, size); + if (__wt_page_is_modified(page)) { + ++intl_dirty_pages; + intl_dirty_bytes += size; + intl_dirty_bytes_max = + WT_MAX(intl_dirty_bytes_max, size); + } + } else { + ++leaf_pages; + leaf_bytes += size; + leaf_bytes_max = WT_MAX(leaf_bytes_max, size); + if (__wt_page_is_modified(page)) { + ++leaf_dirty_pages; + leaf_dirty_bytes += size; + leaf_dirty_bytes_max = + WT_MAX(leaf_dirty_bytes_max, size); + } } - - if (fprintf(fp, - "ID: %6" PRIu64 - ", mod count: %u" - ", pinned ID: %" PRIu64 - ", snap min: %" PRIu64 - ", snap max: %" PRIu64 - ", metadata pinned ID: %" PRIu64 - ", flags: 0x%08" PRIx32 - ", name: %s" - ", isolation: %s" "\n", - id, - txn->mod_count, - s->pinned_id, - txn->snap_min, - txn->snap_max, - s->metadata_pinned, - txn->flags, - conn->sessions[i].name == NULL ? - "EMPTY" : conn->sessions[i].name, - iso_tag) < 0) - return (EIO); } + dhandle = session->dhandle; + if (dhandle->checkpoint == NULL) + WT_RET(__wt_msg(session, "%s():", dhandle->name)); + else + WT_RET(__wt_msg(session, "%s(checkpoint=%s):", + dhandle->name, dhandle->checkpoint)); + if (intl_pages != 0) + WT_RET(__wt_msg(session, + "internal: " + "%" PRIu64 " pages, " + "%" PRIu64 "MB, " + "%" PRIu64 "/%" PRIu64 " clean/dirty pages, " + "%" PRIu64 "/%" PRIu64 " clean/dirty MB, " + "%" PRIu64 "MB max page, " + "%" PRIu64 "MB max dirty page", + intl_pages, + intl_bytes / WT_MEGABYTE, + intl_pages - intl_dirty_pages, + intl_dirty_pages, + (intl_bytes - intl_dirty_bytes) / WT_MEGABYTE, + intl_dirty_bytes / WT_MEGABYTE, + intl_bytes_max / WT_MEGABYTE, + intl_dirty_bytes_max / WT_MEGABYTE)); + if (leaf_pages != 0) + WT_RET(__wt_msg(session, + "leaf: " + "%" PRIu64 " pages, " + "%" PRIu64 "MB, " + "%" PRIu64 "/%" PRIu64 " clean/dirty pages, " + "%" PRIu64 "/%" PRIu64 " clean/dirty MB, " + "%" PRIu64 "MB max page, " + "%" PRIu64 "MB max dirty page", + leaf_pages, + leaf_bytes / WT_MEGABYTE, + leaf_pages - leaf_dirty_pages, + leaf_dirty_pages, + (leaf_bytes - leaf_dirty_bytes) / WT_MEGABYTE, + leaf_dirty_bytes / WT_MEGABYTE, + leaf_bytes_max / WT_MEGABYTE, + leaf_dirty_bytes_max / WT_MEGABYTE)); + + *total_bytesp += intl_bytes + leaf_bytes; + *total_dirty_bytesp += intl_dirty_bytes + leaf_dirty_bytes; + return (0); } /* - * __dump_cache -- - * Output debugging information about the size of the files in cache. + * __wt_verbose_dump_cache -- + * Output diagnostic information about the cache. */ -static int -__dump_cache(WT_SESSION_IMPL *session, FILE *fp) +int +__wt_verbose_dump_cache(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; - WT_DATA_HANDLE *dhandle, *saved_dhandle; - WT_PAGE *page; - WT_REF *next_walk; - uint64_t intl_bytes, intl_bytes_max, intl_dirty_bytes; - uint64_t intl_dirty_bytes_max, intl_dirty_pages, intl_pages; - uint64_t leaf_bytes, leaf_bytes_max, leaf_dirty_bytes; - uint64_t leaf_dirty_bytes_max, leaf_dirty_pages, leaf_pages; + WT_DATA_HANDLE *dhandle; + WT_DECL_RET; uint64_t total_bytes, total_dirty_bytes; - size_t size; conn = S2C(session); total_bytes = total_dirty_bytes = 0; - /* Note: odd string concatenation avoids spelling errors. */ - if (fprintf(fp, "==========\n" "cache dump\n") < 0) - return (EIO); + WT_RET(__wt_msg(session, "%s", WT_DIVIDER)); + WT_RET(__wt_msg(session, "cache dump")); - saved_dhandle = session->dhandle; + __wt_spin_lock(session, &conn->dhandle_lock); TAILQ_FOREACH(dhandle, &conn->dhqh, q) { if (!WT_PREFIX_MATCH(dhandle->name, "file:") || !F_ISSET(dhandle, WT_DHANDLE_OPEN)) continue; - intl_bytes = intl_bytes_max = intl_dirty_bytes = 0; - intl_dirty_bytes_max = intl_dirty_pages = intl_pages = 0; - leaf_bytes = leaf_bytes_max = leaf_dirty_bytes = 0; - leaf_dirty_bytes_max = leaf_dirty_pages = leaf_pages = 0; - - next_walk = NULL; - session->dhandle = dhandle; - while (__wt_tree_walk(session, &next_walk, - WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 && - next_walk != NULL) { - page = next_walk->page; - size = page->memory_footprint; - - if (WT_PAGE_IS_INTERNAL(page)) { - ++intl_pages; - intl_bytes += size; - intl_bytes_max = WT_MAX(intl_bytes_max, size); - if (__wt_page_is_modified(page)) { - ++intl_dirty_pages; - intl_dirty_bytes += size; - intl_dirty_bytes_max = - WT_MAX(intl_dirty_bytes_max, size); - } - } else { - ++leaf_pages; - leaf_bytes += size; - leaf_bytes_max = WT_MAX(leaf_bytes_max, size); - if (__wt_page_is_modified(page)) { - ++leaf_dirty_pages; - leaf_dirty_bytes += size; - leaf_dirty_bytes_max = - WT_MAX(leaf_dirty_bytes_max, size); - } - } - } - session->dhandle = NULL; - - if (dhandle->checkpoint == NULL) { - if (fprintf(fp, - "%s(): \n", dhandle->name) < 0) - return (EIO); - } else { - if (fprintf(fp, "%s(checkpoint=%s): \n", - dhandle->name, dhandle->checkpoint) < 0) - return (EIO); - } - if (intl_pages != 0) { - if (fprintf(fp, - "\t" "internal: " - "%" PRIu64 " pages, " - "%" PRIu64 "MB, " - "%" PRIu64 "/%" PRIu64 " clean/dirty pages, " - "%" PRIu64 "/%" PRIu64 " clean/dirty MB, " - "%" PRIu64 "MB max page, " - "%" PRIu64 "MB max dirty page\n", - intl_pages, - intl_bytes >> 20, - intl_pages - intl_dirty_pages, - intl_dirty_pages, - (intl_bytes - intl_dirty_bytes) >> 20, - intl_dirty_bytes >> 20, - intl_bytes_max >> 20, - intl_dirty_bytes_max >> 20) < 0) - return (EIO); - } - if (leaf_pages != 0) { - if (fprintf(fp, - "\t" "leaf: " - "%" PRIu64 " pages, " - "%" PRIu64 "MB, " - "%" PRIu64 "/%" PRIu64 " clean/dirty pages, " - "%" PRIu64 "/%" PRIu64 " clean/dirty MB, " - "%" PRIu64 "MB max page, " - "%" PRIu64 "MB max dirty page\n", - leaf_pages, - leaf_bytes >> 20, - leaf_pages - leaf_dirty_pages, - leaf_dirty_pages, - (leaf_bytes - leaf_dirty_bytes) >> 20, - leaf_dirty_bytes >> 20, - leaf_bytes_max >> 20, - leaf_dirty_bytes_max >> 20) < 0) - return (EIO); - } - - total_bytes += intl_bytes + leaf_bytes; - total_dirty_bytes += intl_dirty_bytes + leaf_dirty_bytes; + WT_WITH_DHANDLE(session, dhandle, + ret = __verbose_dump_cache_single( + session, &total_bytes, &total_dirty_bytes)); + if (ret != 0) + break; } - session->dhandle = saved_dhandle; + __wt_spin_unlock(session, &conn->dhandle_lock); + WT_RET(ret); /* * Apply the overhead percentage so our total bytes are comparable with @@ -2411,39 +2338,16 @@ __dump_cache(WT_SESSION_IMPL *session, FILE *fp) */ total_bytes = __wt_cache_bytes_plus_overhead(conn->cache, total_bytes); - if (fprintf(fp, + WT_RET(__wt_msg(session, "cache dump: " - "total found: %" PRIu64 "MB vs tracked inuse %" PRIu64 "MB\n" - "total dirty bytes: %" PRIu64 "MB\n", - total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20, - total_dirty_bytes >> 20) < 0) - return (EIO); - if (fprintf(fp, "==========\n") < 0) - return (EIO); + "total found: %" PRIu64 "MB vs tracked inuse %" PRIu64 "MB", + total_bytes / WT_MEGABYTE, + __wt_cache_bytes_inuse(conn->cache) / WT_MEGABYTE)); + WT_RET(__wt_msg(session, + "total dirty bytes: %" PRIu64 "MB", + total_dirty_bytes / WT_MEGABYTE)); + WT_RET(__wt_msg(session, "%s", WT_DIVIDER)); return (0); } - -/* - * __wt_dump_stuck_info -- - * Dump debugging information to a file (default stderr) about the state - * of WiredTiger when we have determined that the cache is stuck full. - */ -int -__wt_dump_stuck_info(WT_SESSION_IMPL *session, const char *ofile) -{ - FILE *fp; - WT_DECL_RET; - - if (ofile == NULL) - fp = stderr; - else if ((fp = fopen(ofile, "w")) == NULL) - return (EIO); - - WT_ERR(__dump_txn_state(session, fp)); - WT_ERR(__dump_cache(session, fp)); -err: if (ofile != NULL && fclose(fp) != 0) - return (EIO); - return (ret); -} #endif diff --git a/src/include/cache.h b/src/include/cache.h index 70f6169200d..abd5a1901f7 100644 --- a/src/include/cache.h +++ b/src/include/cache.h @@ -83,7 +83,7 @@ struct __wt_cache { uint64_t worker_evicts; /* Pages evicted by worker threads */ uint64_t evict_max_page_size; /* Largest page seen at eviction */ -#ifdef HAVE_DIAGNOSTIC +#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE) struct timespec stuck_ts; /* Stuck timestamp */ #endif diff --git a/src/include/extern.h b/src/include/extern.h index 16b3c916b24..88fb8823930 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -352,7 +352,7 @@ extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_dump_stuck_info(WT_SESSION_IMPL *session, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_verbose_dump_cache(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -741,6 +741,7 @@ extern void __wt_txn_stats_update(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATT extern void __wt_txn_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_txn_global_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_verbose_dump_txn(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/flags.h b/src/include/flags.h index 2f0c207078a..0b92a12c686 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -90,28 +90,29 @@ #define WT_VERB_COMPACT 0x00000008 #define WT_VERB_EVICT 0x00000010 #define WT_VERB_EVICTSERVER 0x00000020 -#define WT_VERB_FILEOPS 0x00000040 -#define WT_VERB_HANDLEOPS 0x00000080 -#define WT_VERB_LOG 0x00000100 -#define WT_VERB_LSM 0x00000200 -#define WT_VERB_LSM_MANAGER 0x00000400 -#define WT_VERB_METADATA 0x00000800 -#define WT_VERB_MUTEX 0x00001000 -#define WT_VERB_OVERFLOW 0x00002000 -#define WT_VERB_READ 0x00004000 -#define WT_VERB_REBALANCE 0x00008000 -#define WT_VERB_RECONCILE 0x00010000 -#define WT_VERB_RECOVERY 0x00020000 -#define WT_VERB_RECOVERY_PROGRESS 0x00040000 -#define WT_VERB_SALVAGE 0x00080000 -#define WT_VERB_SHARED_CACHE 0x00100000 -#define WT_VERB_SPLIT 0x00200000 -#define WT_VERB_TEMPORARY 0x00400000 -#define WT_VERB_THREAD_GROUP 0x00800000 -#define WT_VERB_TRANSACTION 0x01000000 -#define WT_VERB_VERIFY 0x02000000 -#define WT_VERB_VERSION 0x04000000 -#define WT_VERB_WRITE 0x08000000 +#define WT_VERB_EVICT_STUCK 0x00000040 +#define WT_VERB_FILEOPS 0x00000080 +#define WT_VERB_HANDLEOPS 0x00000100 +#define WT_VERB_LOG 0x00000200 +#define WT_VERB_LSM 0x00000400 +#define WT_VERB_LSM_MANAGER 0x00000800 +#define WT_VERB_METADATA 0x00001000 +#define WT_VERB_MUTEX 0x00002000 +#define WT_VERB_OVERFLOW 0x00004000 +#define WT_VERB_READ 0x00008000 +#define WT_VERB_REBALANCE 0x00010000 +#define WT_VERB_RECONCILE 0x00020000 +#define WT_VERB_RECOVERY 0x00040000 +#define WT_VERB_RECOVERY_PROGRESS 0x00080000 +#define WT_VERB_SALVAGE 0x00100000 +#define WT_VERB_SHARED_CACHE 0x00200000 +#define WT_VERB_SPLIT 0x00400000 +#define WT_VERB_TEMPORARY 0x00800000 +#define WT_VERB_THREAD_GROUP 0x01000000 +#define WT_VERB_TRANSACTION 0x02000000 +#define WT_VERB_VERIFY 0x04000000 +#define WT_VERB_VERSION 0x08000000 +#define WT_VERB_WRITE 0x10000000 #define WT_VISIBILITY_ERR 0x00000080 /* * flags section: END diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 90989cc679d..03bff7cd04f 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1982,12 +1982,13 @@ struct __wt_connection { * as a list\, such as "verbose=[evictserver\,read]"., a * list\, with values chosen from the following options: \c "api"\, \c * "block"\, \c "checkpoint"\, \c "compact"\, \c "evict"\, \c - * "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c "lsm"\, - * \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c - * "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c - * "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, - * \c "temporary"\, \c "thread_group"\, \c "transaction"\, \c "verify"\, - * \c "version"\, \c "write"; default empty.} + * "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c "handleops"\, \c + * "log"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c + * "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c + * "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c + * "shared_cache"\, \c "split"\, \c "temporary"\, \c "thread_group"\, \c + * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default + * empty.} * @configend * @errors */ @@ -2513,12 +2514,13 @@ struct __wt_connection { * WiredTiger is configured with --enable-verbose. Options are given as a * list\, such as "verbose=[evictserver\,read]"., a list\, with * values chosen from the following options: \c "api"\, \c "block"\, \c - * "checkpoint"\, \c "compact"\, \c "evict"\, \c "evictserver"\, \c "fileops"\, - * \c "handleops"\, \c "log"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c - * "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c - * "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c - * "split"\, \c "temporary"\, \c "thread_group"\, \c "transaction"\, \c - * "verify"\, \c "version"\, \c "write"; default empty.} + * "checkpoint"\, \c "compact"\, \c "evict"\, \c "evict_stuck"\, \c + * "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c "lsm"\, \c + * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c + * "rebalance"\, \c "reconcile"\, \c "recovery"\, \c "recovery_progress"\, \c + * "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c + * "thread_group"\, \c "transaction"\, \c "verify"\, \c "version"\, \c "write"; + * default empty.} * @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to * files. Ignored on non-Windows systems. Options are given as a list\, such * as "write_through=[data]". Configuring \c write_through requires diff --git a/src/txn/txn.c b/src/txn/txn.c index 660d37b17d5..e5e59c2b901 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -803,3 +803,98 @@ __wt_txn_global_destroy(WT_SESSION_IMPL *session) __wt_rwlock_destroy(session, &txn_global->nsnap_rwlock); __wt_free(session, txn_global->states); } + +#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE) +/* + * __wt_verbose_dump_txn -- + * Output diagnostic information about the global transaction state. + */ +int +__wt_verbose_dump_txn(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_TXN_GLOBAL *txn_global; + WT_TXN *txn; + WT_TXN_STATE *s; + const char *iso_tag; + uint64_t id; + uint32_t i, session_cnt; + + conn = S2C(session); + txn_global = &conn->txn_global; + + WT_RET(__wt_msg(session, "%s", WT_DIVIDER)); + WT_RET(__wt_msg(session, "transaction state dump")); + + WT_RET(__wt_msg(session, "current ID: %" PRIu64, txn_global->current)); + WT_RET(__wt_msg(session, + "last running ID: %" PRIu64, txn_global->last_running)); + WT_RET(__wt_msg(session, "oldest ID: %" PRIu64, txn_global->oldest_id)); + WT_RET(__wt_msg(session, + "oldest named snapshot ID: %" PRIu64, txn_global->nsnap_oldest_id)); + + WT_RET(__wt_msg(session, "checkpoint running? %s", + txn_global->checkpoint_running ? "yes" : "no")); + WT_RET(__wt_msg(session, + "checkpoint generation: %" PRIu64, txn_global->checkpoint_gen)); + WT_RET(__wt_msg(session, + "checkpoint pinned ID: %" PRIu64, txn_global->checkpoint_pinned)); + WT_RET(__wt_msg(session, + "checkpoint txn ID: %" PRIu64, txn_global->checkpoint_txnid)); + + WT_ORDERED_READ(session_cnt, conn->session_cnt); + WT_RET(__wt_msg(session, "session count: %" PRIu32, session_cnt)); + + WT_RET(__wt_msg(session, "Transaction state of active sessions:")); + + /* + * Walk each session transaction state and dump information. Accessing + * the content of session handles is not thread safe, so some + * information may change while traversing if other threads are active + * at the same time, which is OK since this is diagnostic code. + */ + for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) { + /* Skip sessions with no active transaction */ + if ((id = s->id) == WT_TXN_NONE && s->pinned_id == WT_TXN_NONE) + continue; + + txn = &conn->sessions[i].txn; + iso_tag = "INVALID"; + switch (txn->isolation) { + case WT_ISO_READ_COMMITTED: + iso_tag = "WT_ISO_READ_COMMITTED"; + break; + case WT_ISO_READ_UNCOMMITTED: + iso_tag = "WT_ISO_READ_UNCOMMITTED"; + break; + case WT_ISO_SNAPSHOT: + iso_tag = "WT_ISO_SNAPSHOT"; + break; + } + + WT_RET(__wt_msg(session, + "ID: %6" PRIu64 + ", mod count: %u" + ", pinned ID: %" PRIu64 + ", snap min: %" PRIu64 + ", snap max: %" PRIu64 + ", metadata pinned ID: %" PRIu64 + ", flags: 0x%08" PRIx32 + ", name: %s" + ", isolation: %s", + id, + txn->mod_count, + s->pinned_id, + txn->snap_min, + txn->snap_max, + s->metadata_pinned, + txn->flags, + conn->sessions[i].name == NULL ? + "EMPTY" : conn->sessions[i].name, + iso_tag)); + } + WT_RET(__wt_msg(session, "%s", WT_DIVIDER)); + + return (0); +} +#endif diff --git a/test/suite/test_reconfig04.py b/test/suite/test_reconfig04.py index be5e6d3729e..51d9b91c1f4 100644 --- a/test/suite/test_reconfig04.py +++ b/test/suite/test_reconfig04.py @@ -26,9 +26,7 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. -import fnmatch, os, time import wiredtiger, wttest -from wtdataset import SimpleDataSet # test_reconfig04.py # Test WT_SESSION::reconfigure -- cgit v1.2.1 From 8aa3922883e7f3d4a9003211faf595250c3bbfdd Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 24 Jan 2017 22:07:16 -0500 Subject: WT-3097 Avoid waiting for threads to timeout during close (#3253) * Add run-time flags checking to __wt_cond_wait_signal(), and its wrappers (__wt_cond_wait(), __wt_cond_auto_wait_signal() and __wt_cond_auto_wait()) so callers of those functions can configure a check that ensures that if the waiting thread races with a waking thread that's turned off flags so the waiting thread quits, the waiting thread returns immediately. * Rework the WT_SESSION.transaction_sync code to wait for the entire time it's configured to wait, it will be awoken if the log reaches stability before that. * Assert we're not waiting longer than a second if not checking the run status. * Set/Clear WT_CONN_LOG_SERVER_RUN in __wt_logmgr_open/__wt_logmgr_destroy rather than in the connection open code. (It's the only server-run flag that gets set in the connection-open code, and I can't see any reason for that exception.) --- dist/api_data.py | 2 +- dist/s_string.ok | 4 +++ src/async/async_api.c | 5 ++- src/async/async_worker.c | 2 +- src/conn/conn_cache.c | 6 ++-- src/conn/conn_cache_pool.c | 8 ++--- src/conn/conn_ckpt.c | 26 +++++++++++---- src/conn/conn_handle.c | 2 +- src/conn/conn_log.c | 50 +++++++++++++---------------- src/conn/conn_open.c | 17 ++++++---- src/conn/conn_stat.c | 25 +++++++++++---- src/conn/conn_sweep.c | 24 +++++++++++--- src/evict/evict_lru.c | 16 ++++++---- src/include/extern.h | 8 ++--- src/include/extern_posix.h | 4 +-- src/include/extern_win.h | 4 +-- src/include/misc.i | 5 +-- src/include/mutex.h | 4 +-- src/include/wiredtiger.in | 2 +- src/log/log.c | 21 ++++++------ src/log/log_slot.c | 2 +- src/lsm/lsm_worker.c | 2 +- src/os_posix/os_mtx_cond.c | 28 ++++++++++++---- src/os_win/os_mtx_cond.c | 43 +++++++++++++++++-------- src/session/session_api.c | 38 ++++++++++++++-------- src/support/cond_auto.c | 80 ++++++++++------------------------------------ src/support/thread_group.c | 2 +- 27 files changed, 237 insertions(+), 193 deletions(-) diff --git a/dist/api_data.py b/dist/api_data.py index b1332320a7c..1d669fa7fe0 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -718,7 +718,7 @@ wiredtiger_open_common =\ ]), Config('extensions', '', r''' list of shared library extensions to load (using dlopen). - Any values specified to an library extension are passed to + Any values specified to a library extension are passed to WT_CONNECTION::load_extension as the \c config parameter (for example, extensions=(/path/ext.so={entry=my_entry}))''', diff --git a/dist/s_string.ok b/dist/s_string.ok index 2b998c27813..bb0cacd9d5d 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -1217,6 +1217,7 @@ upg uri uri's uris +usec usecs usedp userbad @@ -1247,6 +1248,9 @@ vunpack vw vxr waitpid +waker +wakeup +wakeups walk's warmup wb diff --git a/src/async/async_api.c b/src/async/async_api.c index 54bcb7cd26c..026a008188c 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -240,8 +240,7 @@ __async_start(WT_SESSION_IMPL *session) async = conn->async; TAILQ_INIT(&async->formatqh); WT_RET(__wt_spin_init(session, &async->ops_lock, "ops")); - WT_RET(__wt_cond_alloc( - session, "async flush", false, &async->flush_cond)); + WT_RET(__wt_cond_alloc(session, "async flush", &async->flush_cond)); WT_RET(__wt_async_op_init(session)); /* @@ -541,7 +540,7 @@ retry: async->flush_op.state = WT_ASYNCOP_READY; WT_RET(__wt_async_op_enqueue(session, &async->flush_op)); while (async->flush_state != WT_ASYNC_FLUSH_COMPLETE) - __wt_cond_wait(session, async->flush_cond, 100000); + __wt_cond_wait(session, async->flush_cond, 100000, NULL); /* * Flush is done. Clear the flags. */ diff --git a/src/async/async_worker.c b/src/async/async_worker.c index b1bc3902f7c..11f59ed14f1 100644 --- a/src/async/async_worker.c +++ b/src/async/async_worker.c @@ -107,7 +107,7 @@ __async_flush_wait(WT_SESSION_IMPL *session, WT_ASYNC *async, uint64_t my_gen) { while (async->flush_state == WT_ASYNC_FLUSHING && async->flush_gen == my_gen) - __wt_cond_wait(session, async->flush_cond, 10000); + __wt_cond_wait(session, async->flush_cond, 10000, NULL); } /* diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c index 2b0e5081f04..28dd06332e0 100644 --- a/src/conn/conn_cache.c +++ b/src/conn/conn_cache.c @@ -187,8 +187,8 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET_MSG(session, EINVAL, "eviction target must be lower than the eviction trigger"); - WT_RET(__wt_cond_auto_alloc(session, "cache eviction server", - false, 10000, WT_MILLION, &cache->evict_cond)); + WT_RET(__wt_cond_auto_alloc(session, + "cache eviction server", 10000, WT_MILLION, &cache->evict_cond)); WT_RET(__wt_spin_init(session, &cache->evict_pass_lock, "evict pass")); WT_RET(__wt_spin_init(session, &cache->evict_queue_lock, "cache eviction queue")); @@ -312,7 +312,7 @@ __wt_cache_destroy(WT_SESSION_IMPL *session) cache->bytes_dirty_intl + cache->bytes_dirty_leaf, cache->pages_dirty_intl + cache->pages_dirty_leaf); - WT_TRET(__wt_cond_auto_destroy(session, &cache->evict_cond)); + WT_TRET(__wt_cond_destroy(session, &cache->evict_cond)); __wt_spin_destroy(session, &cache->evict_pass_lock); __wt_spin_destroy(session, &cache->evict_queue_lock); __wt_spin_destroy(session, &cache->evict_walk_lock); diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c index 79c2fc23da5..49b766f4602 100644 --- a/src/conn/conn_cache_pool.c +++ b/src/conn/conn_cache_pool.c @@ -32,7 +32,7 @@ */ #define WT_CACHE_POOL_APP_EVICT_MULTIPLIER 3 #define WT_CACHE_POOL_APP_WAIT_MULTIPLIER 6 -#define WT_CACHE_POOL_READ_MULTIPLIER 1 +#define WT_CACHE_POOL_READ_MULTIPLIER 1 static void __cache_pool_adjust( WT_SESSION_IMPL *, uint64_t, uint64_t, bool, bool *); @@ -104,8 +104,8 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) TAILQ_INIT(&cp->cache_pool_qh); WT_ERR(__wt_spin_init( session, &cp->cache_pool_lock, "cache shared pool")); - WT_ERR(__wt_cond_alloc(session, - "cache pool server", false, &cp->cache_pool_cond)); + WT_ERR(__wt_cond_alloc( + session, "cache pool server", &cp->cache_pool_cond)); __wt_process.cache_pool = cp; __wt_verbose(session, @@ -733,7 +733,7 @@ __wt_cache_pool_server(void *arg) F_ISSET(cache, WT_CACHE_POOL_RUN)) { if (cp->currently_used <= cp->size) __wt_cond_wait( - session, cp->cache_pool_cond, WT_MILLION); + session, cp->cache_pool_cond, WT_MILLION, NULL); /* * Re-check pool run flag - since we want to avoid getting the diff --git a/src/conn/conn_ckpt.c b/src/conn/conn_ckpt.c index faeef4e71a2..7797ed4421c 100644 --- a/src/conn/conn_ckpt.c +++ b/src/conn/conn_ckpt.c @@ -62,6 +62,16 @@ __ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, bool *startp) return (0); } +/* + * __ckpt_server_run_chk -- + * Check to decide if the checkpoint server should continue running. + */ +static bool +__ckpt_server_run_chk(WT_SESSION_IMPL *session) +{ + return (F_ISSET(S2C(session), WT_CONN_SERVER_CHECKPOINT)); +} + /* * __ckpt_server -- * The checkpoint server thread. @@ -78,14 +88,18 @@ __ckpt_server(void *arg) conn = S2C(session); wt_session = (WT_SESSION *)session; - while (F_ISSET(conn, WT_CONN_SERVER_RUN) && - F_ISSET(conn, WT_CONN_SERVER_CHECKPOINT)) { + for (;;) { /* * Wait... * NOTE: If the user only configured logsize, then usecs * will be 0 and this wait won't return until signalled. */ - __wt_cond_wait(session, conn->ckpt_cond, conn->ckpt_usecs); + __wt_cond_wait(session, + conn->ckpt_cond, conn->ckpt_usecs, __ckpt_server_run_chk); + + /* Check if we're quitting or being reconfigured. */ + if (!__ckpt_server_run_chk(session)) + break; /* * Checkpoint the database if the connection is marked dirty. @@ -113,7 +127,8 @@ __ckpt_server(void *arg) * it so we don't do another checkpoint * immediately. */ - __wt_cond_wait(session, conn->ckpt_cond, 1); + __wt_cond_wait( + session, conn->ckpt_cond, 1, NULL); } } else WT_STAT_CONN_INCR(session, txn_checkpoint_skipped); @@ -152,8 +167,7 @@ __ckpt_server_start(WT_CONNECTION_IMPL *conn) "checkpoint-server", true, session_flags, &conn->ckpt_session)); session = conn->ckpt_session; - WT_RET(__wt_cond_alloc( - session, "checkpoint server", false, &conn->ckpt_cond)); + WT_RET(__wt_cond_alloc(session, "checkpoint server", &conn->ckpt_cond)); /* * Start the thread. diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index 7203b75e4ae..54bcfd98aba 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -79,7 +79,7 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_RET(__wt_spin_init( session, &conn->lsm_manager.switch_lock, "LSM switch queue lock")); WT_RET(__wt_cond_alloc( - session, "LSM worker cond", false, &conn->lsm_manager.work_cond)); + session, "LSM worker cond", &conn->lsm_manager.work_cond)); /* * Generation numbers. diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index 8f8f8614ba8..c6dd795389d 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -174,7 +174,7 @@ __logmgr_config( WT_RET(__logmgr_sync_cfg(session, cfg)); if (conn->log_cond != NULL) - __wt_cond_auto_signal(session, conn->log_cond); + __wt_cond_signal(session, conn->log_cond); return (0); } @@ -341,7 +341,7 @@ __wt_log_truncate_files( conn = S2C(session); if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) return (0); - if (F_ISSET(conn, WT_CONN_SERVER_RUN) && + if (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN) && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) WT_RET_MSG(session, EINVAL, "Attempt to archive manually while a server is running"); @@ -505,8 +505,7 @@ __log_file_server(void *arg) locked = false; __wt_spin_unlock(session, &log->log_sync_lock); } else { - __wt_cond_auto_signal( - session, conn->log_wrlsn_cond); + __wt_cond_signal(session, conn->log_wrlsn_cond); /* * We do not want to wait potentially a second * to process this. Yield to give the wrlsn @@ -517,8 +516,9 @@ __log_file_server(void *arg) continue; } } + /* Wait until the next event. */ - __wt_cond_wait(session, conn->log_file_cond, WT_MILLION / 10); + __wt_cond_wait(session, conn->log_file_cond, 100000, NULL); } if (0) { @@ -730,12 +730,8 @@ __log_wrlsn_server(void *arg) if (yield++ < WT_THOUSAND) __wt_yield(); else - /* - * Send in false because if we did any work we would - * not be on this path. - */ __wt_cond_auto_wait( - session, conn->log_wrlsn_cond, did_work); + session, conn->log_wrlsn_cond, did_work, NULL); } /* * On close we need to do this one more time because there could @@ -840,10 +836,9 @@ __log_server(void *arg) } /* Wait until the next event. */ - __wt_epoch(session, &start); - __wt_cond_auto_wait_signal(session, - conn->log_cond, did_work, &signalled); + __wt_cond_auto_wait_signal( + session, conn->log_cond, did_work, NULL, &signalled); __wt_epoch(session, &now); timediff = WT_TIMEDIFF_MS(now, start); } @@ -904,10 +899,8 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_INIT_LSN(&log->write_lsn); WT_INIT_LSN(&log->write_start_lsn); log->fileid = 0; - WT_RET(__wt_cond_alloc( - session, "log sync", false, &log->log_sync_cond)); - WT_RET(__wt_cond_alloc( - session, "log write", false, &log->log_write_cond)); + WT_RET(__wt_cond_alloc(session, "log sync", &log->log_sync_cond)); + WT_RET(__wt_cond_alloc(session, "log write", &log->log_write_cond)); WT_RET(__wt_log_open(session)); WT_RET(__wt_log_slot_init(session)); @@ -930,6 +923,8 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) return (0); + F_SET(conn, WT_CONN_LOG_SERVER_RUN); + /* * Start the log close thread. It is not configurable. * If logging is enabled, this thread runs. @@ -937,8 +932,8 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) session_flags = WT_SESSION_NO_DATA_HANDLES; WT_RET(__wt_open_internal_session(conn, "log-close-server", false, session_flags, &conn->log_file_session)); - WT_RET(__wt_cond_alloc(conn->log_file_session, - "log close server", false, &conn->log_file_cond)); + WT_RET(__wt_cond_alloc( + conn->log_file_session, "log close server", &conn->log_file_cond)); /* * Start the log file close thread. @@ -954,8 +949,7 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) WT_RET(__wt_open_internal_session(conn, "log-wrlsn-server", false, session_flags, &conn->log_wrlsn_session)); WT_RET(__wt_cond_auto_alloc(conn->log_wrlsn_session, - "log write lsn server", false, 10000, WT_MILLION, - &conn->log_wrlsn_cond)); + "log write lsn server", 10000, WT_MILLION, &conn->log_wrlsn_cond)); WT_RET(__wt_thread_create(conn->log_wrlsn_session, &conn->log_wrlsn_tid, __log_wrlsn_server, conn->log_wrlsn_session)); conn->log_wrlsn_tid_set = true; @@ -969,13 +963,13 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) if (conn->log_session != NULL) { WT_ASSERT(session, conn->log_cond != NULL); WT_ASSERT(session, conn->log_tid_set == true); - __wt_cond_auto_signal(session, conn->log_cond); + __wt_cond_signal(session, conn->log_cond); } else { /* The log server gets its own session. */ WT_RET(__wt_open_internal_session(conn, "log-server", false, session_flags, &conn->log_session)); WT_RET(__wt_cond_auto_alloc(conn->log_session, - "log server", false, 50000, WT_MILLION, &conn->log_cond)); + "log server", 50000, WT_MILLION, &conn->log_cond)); /* * Start the thread. @@ -1001,6 +995,8 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) conn = S2C(session); + F_CLR(conn, WT_CONN_LOG_SERVER_RUN); + if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) { /* * We always set up the log_path so printlog can work without @@ -1011,7 +1007,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) return (0); } if (conn->log_tid_set) { - __wt_cond_auto_signal(session, conn->log_cond); + __wt_cond_signal(session, conn->log_cond); WT_TRET(__wt_thread_join(session, conn->log_tid)); conn->log_tid_set = false; } @@ -1026,7 +1022,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) conn->log_file_session = NULL; } if (conn->log_wrlsn_tid_set) { - __wt_cond_auto_signal(session, conn->log_wrlsn_cond); + __wt_cond_signal(session, conn->log_wrlsn_cond); WT_TRET(__wt_thread_join(session, conn->log_wrlsn_tid)); conn->log_wrlsn_tid_set = false; } @@ -1047,9 +1043,9 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) } /* Destroy the condition variables now that all threads are stopped */ - WT_TRET(__wt_cond_auto_destroy(session, &conn->log_cond)); + WT_TRET(__wt_cond_destroy(session, &conn->log_cond)); WT_TRET(__wt_cond_destroy(session, &conn->log_file_cond)); - WT_TRET(__wt_cond_auto_destroy(session, &conn->log_wrlsn_cond)); + WT_TRET(__wt_cond_destroy(session, &conn->log_wrlsn_cond)); WT_TRET(__wt_cond_destroy(session, &conn->log->log_sync_cond)); WT_TRET(__wt_cond_destroy(session, &conn->log->log_write_cond)); diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index f8029f2c728..5b20377d437 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -25,7 +25,7 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) * Tell internal server threads to run: this must be set before opening * any sessions. */ - F_SET(conn, WT_CONN_SERVER_RUN | WT_CONN_LOG_SERVER_RUN); + F_SET(conn, WT_CONN_SERVER_RUN); /* WT_SESSION_IMPL array. */ WT_RET(__wt_calloc(session, @@ -100,8 +100,12 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) __wt_yield(); } - /* Clear any pending async ops. */ + /* + * Clear any pending async operations and shut down the async worker + * threads and system before closing LSM. + */ WT_TRET(__wt_async_flush(session)); + WT_TRET(__wt_async_destroy(session)); /* * Shut down server threads other than the eviction server, which is @@ -110,14 +114,14 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) * exit before files are closed. */ F_CLR(conn, WT_CONN_SERVER_RUN); - WT_TRET(__wt_async_destroy(session)); WT_TRET(__wt_lsm_manager_destroy(session)); - WT_TRET(__wt_sweep_destroy(session)); F_SET(conn, WT_CONN_CLOSING); - WT_TRET(__wt_checkpoint_server_destroy(session)); WT_TRET(__wt_statlog_destroy(session, true)); + WT_TRET(__wt_sweep_destroy(session)); + + /* The eviction server is shut down last. */ WT_TRET(__wt_evict_destroy(session)); /* Shut down the lookaside table, after all eviction is complete. */ @@ -126,7 +130,7 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) /* Close open data handles. */ WT_TRET(__wt_conn_dhandle_discard(session)); - /* Shut down metadata tracking, required before creating tables. */ + /* Shut down metadata tracking. */ WT_TRET(__wt_meta_track_destroy(session)); /* @@ -140,7 +144,6 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE)) WT_TRET(__wt_txn_checkpoint_log( session, true, WT_TXN_LOG_CKPT_STOP, NULL)); - F_CLR(conn, WT_CONN_LOG_SERVER_RUN); WT_TRET(__wt_logmgr_destroy(session)); /* Free memory for collators, compressors, data sources. */ diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c index 3bcdfd7ecb1..31dc9c45992 100644 --- a/src/conn/conn_stat.c +++ b/src/conn/conn_stat.c @@ -485,8 +485,7 @@ __statlog_on_close(WT_SESSION_IMPL *session) if (!FLD_ISSET(conn->stat_flags, WT_STAT_ON_CLOSE)) return (0); - if (F_ISSET(conn, WT_CONN_SERVER_RUN) && - F_ISSET(conn, WT_CONN_SERVER_STATISTICS)) + if (F_ISSET(conn, WT_CONN_SERVER_STATISTICS)) WT_RET_MSG(session, EINVAL, "Attempt to log statistics while a server is running"); @@ -497,6 +496,16 @@ err: __wt_scr_free(session, &tmp); return (ret); } +/* + * __statlog_server_run_chk -- + * Check to decide if the statistics log server should continue running. + */ +static bool +__statlog_server_run_chk(WT_SESSION_IMPL *session) +{ + return (F_ISSET(S2C(session), WT_CONN_SERVER_STATISTICS)); +} + /* * __statlog_server -- * The statistics server thread. @@ -525,10 +534,14 @@ __statlog_server(void *arg) WT_ERR(__wt_buf_init(session, &path, strlen(conn->stat_path) + 128)); WT_ERR(__wt_buf_init(session, &tmp, strlen(conn->stat_path) + 128)); - while (F_ISSET(conn, WT_CONN_SERVER_RUN) && - F_ISSET(conn, WT_CONN_SERVER_STATISTICS)) { + for (;;) { /* Wait until the next event. */ - __wt_cond_wait(session, conn->stat_cond, conn->stat_usecs); + __wt_cond_wait(session, conn->stat_cond, + conn->stat_usecs, __statlog_server_run_chk); + + /* Check if we're quitting or being reconfigured. */ + if (!__statlog_server_run_chk(session)) + break; if (WT_STAT_ENABLED(session)) WT_ERR(__statlog_log_one(session, &path, &tmp)); @@ -563,7 +576,7 @@ __statlog_start(WT_CONNECTION_IMPL *conn) session = conn->stat_session; WT_RET(__wt_cond_alloc( - session, "statistics log server", false, &conn->stat_cond)); + session, "statistics log server", &conn->stat_cond)); /* * Start the thread. diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index 7d5cb7d7c72..f9b7305c7d8 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -245,6 +245,16 @@ __sweep_remove_handles(WT_SESSION_IMPL *session) return (ret == EBUSY ? 0 : ret); } +/* + * __sweep_server_run_chk -- + * Check to decide if the checkpoint server should continue running. + */ +static bool +__sweep_server_run_chk(WT_SESSION_IMPL *session) +{ + return (F_ISSET(S2C(session), WT_CONN_SERVER_SWEEP)); +} + /* * __sweep_server -- * The handle sweep server thread. @@ -266,11 +276,15 @@ __sweep_server(void *arg) /* * Sweep for dead and excess handles. */ - while (F_ISSET(conn, WT_CONN_SERVER_RUN) && - F_ISSET(conn, WT_CONN_SERVER_SWEEP)) { + for (;;) { /* Wait until the next event. */ - __wt_cond_wait(session, - conn->sweep_cond, conn->sweep_interval * WT_MILLION); + __wt_cond_wait(session, conn->sweep_cond, + conn->sweep_interval * WT_MILLION, __sweep_server_run_chk); + + /* Check if we're quitting or being reconfigured. */ + if (!__sweep_server_run_chk(session)) + break; + __wt_seconds(session, &now); WT_STAT_CONN_INCR(session, dh_sweeps); @@ -390,7 +404,7 @@ __wt_sweep_create(WT_SESSION_IMPL *session) session = conn->sweep_session; WT_RET(__wt_cond_alloc( - session, "handle sweep server", false, &conn->sweep_cond)); + session, "handle sweep server", &conn->sweep_cond)); WT_RET(__wt_thread_create( session, &conn->sweep_tid, __sweep_server, session)); diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 0cf746f84eb..48ea1ccb02b 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -267,7 +267,7 @@ __wt_evict_server_wake(WT_SESSION_IMPL *session) } #endif - __wt_cond_auto_signal(session, cache->evict_cond); + __wt_cond_signal(session, cache->evict_cond); } /* @@ -311,9 +311,10 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) __wt_spin_unlock(session, &cache->evict_pass_lock); WT_ERR(ret); __wt_verbose(session, WT_VERB_EVICTSERVER, "sleeping"); + /* Don't rely on signals: check periodically. */ __wt_cond_auto_wait( - session, cache->evict_cond, did_work); + session, cache->evict_cond, did_work, NULL); __wt_verbose(session, WT_VERB_EVICTSERVER, "waking"); } else WT_ERR(__evict_lru_pages(session, false)); @@ -712,8 +713,8 @@ __evict_pass(WT_SESSION_IMPL *session) */ WT_STAT_CONN_INCR(session, cache_eviction_server_slept); - __wt_cond_wait( - session, cache->evict_cond, WT_THOUSAND); + __wt_cond_wait(session, + cache->evict_cond, WT_THOUSAND, NULL); continue; } @@ -1102,7 +1103,8 @@ __evict_lru_pages(WT_SESSION_IMPL *session, bool is_server) /* If a worker thread found the queue empty, pause. */ if (ret == WT_NOTFOUND && !is_server && F_ISSET(S2C(session), WT_CONN_EVICTION_RUN)) - __wt_cond_wait(session, conn->evict_threads.wait_cond, 10000); + __wt_cond_wait( + session, conn->evict_threads.wait_cond, 10000, NULL); return (ret == WT_NOTFOUND ? 0 : ret); } @@ -2102,8 +2104,8 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) break; case WT_NOTFOUND: /* Allow the queue to re-populate before retrying. */ - __wt_cond_wait( - session, conn->evict_threads.wait_cond, 10000); + __wt_cond_wait(session, + conn->evict_threads.wait_cond, 10000, NULL); cache->app_waits++; break; default: diff --git a/src/include/extern.h b/src/include/extern.h index 88fb8823930..eb2f9a0e784 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -613,11 +613,9 @@ extern void __wt_session_close_cache(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ extern int __wt_session_get_btree(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cond_auto_alloc( WT_SESSION_IMPL *session, const char *name, bool is_signalled, uint64_t min, uint64_t max, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_auto_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_auto_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_auto_wait( WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cond_auto_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_cond_auto_alloc(WT_SESSION_IMPL *session, const char *name, uint64_t min, uint64_t max, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_cond_auto_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_cond_auto_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_decrypt(WT_SESSION_IMPL *session, WT_ENCRYPTOR *encryptor, size_t skip, WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_encrypt(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t skip, WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_encrypt_size(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/extern_posix.h b/src/include/extern_posix.h index 5acb7b0ed27..fed7835ada1 100644 --- a/src/include/extern_posix.h +++ b/src/include/extern_posix.h @@ -12,8 +12,8 @@ extern int __wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapp extern int __wt_posix_map_preload(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_posix_map_discard(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_region, size_t len, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/extern_win.h b/src/include/extern_win.h index 11b45f11304..0bfc821c7a6 100644 --- a/src/include/extern_win.h +++ b/src/include/extern_win.h @@ -10,8 +10,8 @@ extern int __wt_os_win(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((war extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_region, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/misc.i b/src/include/misc.i index f36be32d6a2..d5692a3f9cf 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -11,11 +11,12 @@ * Wait on a mutex, optionally timing out. */ static inline void -__wt_cond_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs) +__wt_cond_wait(WT_SESSION_IMPL *session, + WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *)) { bool notused; - __wt_cond_wait_signal(session, cond, usecs, ¬used); + __wt_cond_wait_signal(session, cond, usecs, run_func, ¬used); } /* diff --git a/src/include/mutex.h b/src/include/mutex.h index 727a690bb1c..06b8c4a3304 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -21,8 +21,8 @@ struct __wt_condvar { int waiters; /* Numbers of waiters, or -1 if signalled with no waiters. */ /* - * The following fields are only used for automatically adjusting - * condition variables. They could be in a separate structure. + * The following fields are used for automatically adjusting condition + * variable wait times. */ uint64_t min_wait; /* Minimum wait duration */ uint64_t max_wait; /* Maximum wait duration */ diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 03bff7cd04f..f05d3d4ab55 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -2362,7 +2362,7 @@ struct __wt_connection { * @config{exclusive, fail if the database already exists\, generally used with * the \c create option., a boolean flag; default \c false.} * @config{extensions, list of shared library extensions to load (using dlopen). - * Any values specified to an library extension are passed to + * Any values specified to a library extension are passed to * WT_CONNECTION::load_extension as the \c config parameter (for example\, * extensions=(/path/ext.so={entry=my_entry}))., a list of strings; * default empty.} diff --git a/src/log/log.c b/src/log/log.c index da500a74e87..614ae1a9b6d 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -43,11 +43,11 @@ __log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) */ if (F_ISSET(session, WT_SESSION_LOCKED_SLOT)) __wt_spin_unlock(session, &log->log_slot_lock); - __wt_cond_auto_signal(session, conn->log_wrlsn_cond); + __wt_cond_signal(session, conn->log_wrlsn_cond); if (++yield_count < WT_THOUSAND) __wt_yield(); else - __wt_cond_wait(session, log->log_write_cond, 200); + __wt_cond_wait(session, log->log_write_cond, 200, NULL); if (F_ISSET(session, WT_SESSION_LOCKED_SLOT)) __wt_spin_lock(session, &log->log_slot_lock); } @@ -89,7 +89,7 @@ __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn) log = conn->log; log->ckpt_lsn = *ckp_lsn; if (conn->log_cond != NULL) - __wt_cond_auto_signal(session, conn->log_cond); + __wt_cond_signal(session, conn->log_cond); } /* @@ -170,7 +170,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) */ while (log->sync_lsn.l.file < min_lsn->l.file) { __wt_cond_signal(session, S2C(session)->log_file_cond); - __wt_cond_wait(session, log->log_sync_cond, 10000); + __wt_cond_wait(session, log->log_sync_cond, 10000, NULL); } __wt_spin_lock(session, &log->log_sync_lock); WT_ASSERT(session, log->log_dir_fh != NULL); @@ -915,7 +915,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) else { WT_STAT_CONN_INCR(session, log_prealloc_missed); if (conn->log_cond != NULL) - __wt_cond_auto_signal( + __wt_cond_signal( session, conn->log_cond); } } @@ -1490,7 +1490,8 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) */ if (log->sync_lsn.l.file < slot->slot_end_lsn.l.file || __wt_spin_trylock(session, &log->log_sync_lock) != 0) { - __wt_cond_wait(session, log->log_sync_cond, 10000); + __wt_cond_wait( + session, log->log_sync_cond, 10000, NULL); continue; } locked = true; @@ -2160,7 +2161,7 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, * XXX I've seen times when conditions are NULL. */ if (conn->log_cond != NULL) { - __wt_cond_auto_signal(session, conn->log_cond); + __wt_cond_signal(session, conn->log_cond); __wt_yield(); } else WT_ERR(__wt_log_force_write(session, 1, NULL)); @@ -2169,12 +2170,14 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, /* Wait for our writes to reach the OS */ while (__wt_log_cmp(&log->write_lsn, &lsn) <= 0 && myslot.slot->slot_error == 0) - __wt_cond_wait(session, log->log_write_cond, 10000); + __wt_cond_wait( + session, log->log_write_cond, 10000, NULL); } else if (LF_ISSET(WT_LOG_FSYNC)) { /* Wait for our writes to reach disk */ while (__wt_log_cmp(&log->sync_lsn, &lsn) <= 0 && myslot.slot->slot_error == 0) - __wt_cond_wait(session, log->log_sync_cond, 10000); + __wt_cond_wait( + session, log->log_sync_cond, 10000, NULL); } /* diff --git a/src/log/log_slot.c b/src/log/log_slot.c index d70c0d689be..d6e692f8c51 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -349,7 +349,7 @@ __wt_log_slot_new(WT_SESSION_IMPL *session) /* * If we didn't find any free slots signal the worker thread. */ - __wt_cond_auto_signal(session, conn->log_wrlsn_cond); + __wt_cond_signal(session, conn->log_wrlsn_cond); __wt_yield(); #ifdef HAVE_DIAGNOSTIC ++count; diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index b0d0758775d..ffa00c0a5e7 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -154,7 +154,7 @@ __lsm_worker(void *arg) /* Don't busy wait if there was any work to do. */ if (!progress) { - __wt_cond_wait(session, cookie->work_cond, 10000); + __wt_cond_wait(session, cookie->work_cond, 10000, NULL); continue; } } diff --git a/src/os_posix/os_mtx_cond.c b/src/os_posix/os_mtx_cond.c index be8b1abda31..a5ee78f9e3e 100644 --- a/src/os_posix/os_mtx_cond.c +++ b/src/os_posix/os_mtx_cond.c @@ -13,8 +13,7 @@ * Allocate and initialize a condition variable. */ int -__wt_cond_alloc(WT_SESSION_IMPL *session, - const char *name, bool is_signalled, WT_CONDVAR **condp) +__wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) { WT_CONDVAR *cond; WT_DECL_RET; @@ -27,7 +26,7 @@ __wt_cond_alloc(WT_SESSION_IMPL *session, WT_ERR(pthread_cond_init(&cond->cond, NULL)); cond->name = name; - cond->waiters = is_signalled ? -1 : 0; + cond->waiters = 0; *condp = cond; return (0); @@ -42,8 +41,8 @@ err: __wt_free(session, cond); * out period expires, let the caller know. */ void -__wt_cond_wait_signal( - WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled) +__wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, + uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) { struct timespec ts; WT_DECL_RET; @@ -62,6 +61,23 @@ __wt_cond_wait_signal( WT_ERR(pthread_mutex_lock(&cond->mtx)); locked = true; + /* + * It's possible to race with threads waking us up. That's not a problem + * if there are multiple wakeups because the next wakeup will get us, or + * if we're only pausing for a short period. It's a problem if there's + * only a single wakeup, our waker is likely waiting for us to exit. + * After acquiring the mutex (so we're guaranteed to be awakened by any + * future wakeup call), optionally check if we're OK to keep running. + * This won't ensure our caller won't just loop and call us again, but + * at least it's not our fault. + * + * Assert we're not waiting longer than a second if not checking the + * run status. + */ + WT_ASSERT(session, run_func != NULL || usecs <= WT_MILLION); + if (run_func != NULL && !run_func(session)) + goto skipping; + if (usecs > 0) { __wt_epoch(session, &ts); ts.tv_sec += (time_t) @@ -81,7 +97,7 @@ __wt_cond_wait_signal( ret == ETIME || #endif ret == ETIMEDOUT) { - *signalled = false; +skipping: *signalled = false; ret = 0; } diff --git a/src/os_win/os_mtx_cond.c b/src/os_win/os_mtx_cond.c index 79c62ccd7f2..0001c6c2322 100644 --- a/src/os_win/os_mtx_cond.c +++ b/src/os_win/os_mtx_cond.c @@ -13,8 +13,7 @@ * Allocate and initialize a condition variable. */ int -__wt_cond_alloc(WT_SESSION_IMPL *session, - const char *name, bool is_signalled, WT_CONDVAR **condp) +__wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) { WT_CONDVAR *cond; @@ -26,7 +25,7 @@ __wt_cond_alloc(WT_SESSION_IMPL *session, InitializeConditionVariable(&cond->cond); cond->name = name; - cond->waiters = is_signalled ? -1 : 0; + cond->waiters = 0; *condp = cond; return (0); @@ -38,8 +37,8 @@ __wt_cond_alloc(WT_SESSION_IMPL *session, * out period expires, let the caller know. */ void -__wt_cond_wait_signal( - WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled) +__wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, + uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) { BOOL sleepret; DWORD milliseconds, windows_error; @@ -59,8 +58,26 @@ __wt_cond_wait_signal( EnterCriticalSection(&cond->mtx); locked = true; + /* + * It's possible to race with threads waking us up. That's not a problem + * if there are multiple wakeups because the next wakeup will get us, or + * if we're only pausing for a short period. It's a problem if there's + * only a single wakeup, our waker is likely waiting for us to exit. + * After acquiring the mutex (so we're guaranteed to be awakened by any + * future wakeup call), optionally check if we're OK to keep running. + * This won't ensure our caller won't just loop and call us again, but + * at least it's not our fault. + * + * Assert we're not waiting longer than a second if not checking the + * run status. + */ + WT_ASSERT(session, run_func != NULL || usecs <= WT_MILLION); + + if (run_func != NULL && !run_func(session)) + goto skipping; + if (usecs > 0) { - milliseconds64 = usecs / 1000; + milliseconds64 = usecs / WT_THOUSAND; /* * Check for 32-bit unsigned integer overflow @@ -90,7 +107,7 @@ __wt_cond_wait_signal( if (sleepret == 0) { windows_error = __wt_getlasterror(); if (windows_error == ERROR_TIMEOUT) { - *signalled = false; +skipping: *signalled = false; sleepret = 1; } } @@ -117,17 +134,17 @@ void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) { WT_DECL_RET; - bool locked; - - locked = false; __wt_verbose(session, WT_VERB_MUTEX, "signal %s", cond->name); /* - * Our callers are often setting flags to cause a thread to exit. Add - * a barrier to ensure the flags are seen by the threads. + * Our callers often set flags to cause a thread to exit. Add a barrier + * to ensure exit flags are seen by the sleeping threads, otherwise we + * can wake up a thread, it immediately goes back to sleep, and we'll + * hang. Use a full barrier (we may not write before waiting on thread + * join). */ - WT_WRITE_BARRIER(); + WT_FULL_BARRIER(); /* * Fast path if we are in (or can enter), a state where the next waiter diff --git a/src/session/session_api.c b/src/session/session_api.c index fcbfa8809b3..71626e098cb 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -1488,6 +1488,20 @@ __session_transaction_pinned_range(WT_SESSION *wt_session, uint64_t *prange) err: API_END_RET(session, ret); } +/* + * __transaction_sync_run_chk -- + * Check to decide if the transaction sync call should continue running. + */ +static bool +__transaction_sync_run_chk(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + + return (FLD_ISSET(conn->flags, WT_CONN_LOG_SERVER_RUN)); +} + /* * __session_transaction_sync -- * WT_SESSION->transaction_sync method. @@ -1502,7 +1516,7 @@ __session_transaction_sync(WT_SESSION *wt_session, const char *config) WT_SESSION_IMPL *session; WT_TXN *txn; struct timespec now, start; - uint64_t timeout_ms, waited_ms; + uint64_t remaining_usec, timeout_ms, waited_ms; bool forever; session = (WT_SESSION_IMPL *)wt_session; @@ -1555,22 +1569,20 @@ __session_transaction_sync(WT_SESSION *wt_session, const char *config) __wt_epoch(session, &start); /* * Keep checking the LSNs until we find it is stable or we reach - * our timeout. + * our timeout, or there's some other reason to quit. */ while (__wt_log_cmp(&session->bg_sync_lsn, &log->sync_lsn) > 0) { + if (!__transaction_sync_run_chk(session)) + WT_ERR(ETIMEDOUT); + __wt_cond_signal(session, conn->log_file_cond); __wt_epoch(session, &now); waited_ms = WT_TIMEDIFF_MS(now, start); - if (forever || waited_ms < timeout_ms) - /* - * Note, we will wait an increasing amount of time - * each iteration, likely doubling. Also note that - * the function timeout value is in usecs (we are - * computing the wait time in msecs and passing that - * in, unchanged, as the usecs to wait). - */ - __wt_cond_wait(session, log->log_sync_cond, waited_ms); - else + if (forever || waited_ms < timeout_ms) { + remaining_usec = (timeout_ms - waited_ms) * WT_THOUSAND; + __wt_cond_wait(session, log->log_sync_cond, + remaining_usec, __transaction_sync_run_chk); + } else WT_ERR(ETIMEDOUT); } @@ -1825,7 +1837,7 @@ __open_session(WT_CONNECTION_IMPL *conn, session_ret->name = NULL; session_ret->id = i; - WT_ERR(__wt_cond_alloc(session, "session", false, &session_ret->cond)); + WT_ERR(__wt_cond_alloc(session, "session", &session_ret->cond)); if (WT_SESSION_FIRST_USE(session_ret)) __wt_random_init(&session_ret->rnd); diff --git a/src/support/cond_auto.c b/src/support/cond_auto.c index a3ae67f5baa..600e5eab0ff 100644 --- a/src/support/cond_auto.c +++ b/src/support/cond_auto.c @@ -1,29 +1,9 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. + * See the file LICENSE for redistribution information. */ #include "wt_internal.h" @@ -38,13 +18,12 @@ * Allocate and initialize an automatically adjusting condition variable. */ int -__wt_cond_auto_alloc( - WT_SESSION_IMPL *session, const char *name, - bool is_signalled, uint64_t min, uint64_t max, WT_CONDVAR **condp) +__wt_cond_auto_alloc(WT_SESSION_IMPL *session, + const char *name, uint64_t min, uint64_t max, WT_CONDVAR **condp) { WT_CONDVAR *cond; - WT_RET(__wt_cond_alloc(session, name, is_signalled, condp)); + WT_RET(__wt_cond_alloc(session, name, condp)); cond = *condp; cond->min_wait = min; @@ -54,34 +33,20 @@ __wt_cond_auto_alloc( return (0); } -/* - * __wt_cond_auto_signal -- - * Signal a condition variable. - */ -void -__wt_cond_auto_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) -{ - - WT_ASSERT(session, cond->min_wait != 0); - __wt_cond_signal(session, cond); -} - /* * __wt_cond_auto_wait_signal -- * Wait on a mutex, optionally timing out. If we get it before the time * out period expires, let the caller know. - * TODO: Can this version of the API be removed, now that we have the - * auto adjusting condition variables? */ void -__wt_cond_auto_wait_signal( - WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool *signalled) +__wt_cond_auto_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, + bool progress, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) { uint64_t delta; /* * Catch cases where this function is called with a condition variable - * that was initialized non-auto. + * that wasn't initialized to do automatic adjustments. */ WT_ASSERT(session, cond->min_wait != 0); @@ -94,7 +59,8 @@ __wt_cond_auto_wait_signal( cond->max_wait, cond->prev_wait + delta); } - __wt_cond_wait_signal(session, cond, cond->prev_wait, signalled); + __wt_cond_wait_signal( + session, cond, cond->prev_wait, run_func, signalled); if (progress || *signalled) WT_STAT_CONN_INCR(session, cond_auto_wait_reset); @@ -108,24 +74,10 @@ __wt_cond_auto_wait_signal( * out period expires, let the caller know. */ void -__wt_cond_auto_wait( - WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress) +__wt_cond_auto_wait(WT_SESSION_IMPL *session, + WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *)) { - bool signalled; - - /* - * Call the signal version so the wait period is reset if the - * condition is woken explicitly. - */ - __wt_cond_auto_wait_signal(session, cond, progress, &signalled); -} + bool notused; -/* - * __wt_cond_auto_destroy -- - * Destroy a condition variable. - */ -int -__wt_cond_auto_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) -{ - return (__wt_cond_destroy(session, condp)); + __wt_cond_auto_wait_signal(session, cond, progress, run_func, ¬used); } diff --git a/src/support/thread_group.c b/src/support/thread_group.c index beb143e63e2..2b4b7ad4e61 100644 --- a/src/support/thread_group.c +++ b/src/support/thread_group.c @@ -259,7 +259,7 @@ __wt_thread_group_create( __wt_rwlock_init(session, &group->lock); WT_ERR(__wt_cond_alloc( - session, "Thread group cond", false, &group->wait_cond)); + session, "thread group cond", &group->wait_cond)); cond_alloced = true; __wt_writelock(session, &group->lock); -- cgit v1.2.1 From 0a70661a0d33c9705509955baafded2855054a29 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Thu, 26 Jan 2017 16:54:46 -0500 Subject: WT-3156 Add check in assertions for errors. (#3271) --- src/log/log.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/log/log.c b/src/log/log.c index 614ae1a9b6d..1482cc0aca1 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -2202,12 +2202,12 @@ err: /* * If one of the sync flags is set, assert the proper LSN has moved to - * match. + * match on success. */ - WT_ASSERT(session, !LF_ISSET(WT_LOG_FLUSH) || + WT_ASSERT(session, ret != 0 || !LF_ISSET(WT_LOG_FLUSH) || __wt_log_cmp(&log->write_lsn, &lsn) >= 0); - WT_ASSERT(session, - !LF_ISSET(WT_LOG_FSYNC) || __wt_log_cmp(&log->sync_lsn, &lsn) >= 0); + WT_ASSERT(session, ret != 0 || !LF_ISSET(WT_LOG_FSYNC) || + __wt_log_cmp(&log->sync_lsn, &lsn) >= 0); return (ret); } -- cgit v1.2.1 From 1e24579efee68f6fdb6a4c582275a50d95d7eb81 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Wed, 1 Feb 2017 12:11:48 +1100 Subject: WT-3115 Convert the dhandle list lock into a read/write lock. (#3236) It was a spinlock, but most acquirers only need shared access and it can be a contention point in many-table workloads. Split uses of the handle list lock into small operations. In particular, only hold the handle list lock to get the "next" handle, not for loops over all the handles in the system. Update statistics around handle list lock and corresponding doc. --- dist/flags.py | 3 +- dist/s_stat | 3 - dist/stat_data.py | 4 +- src/conn/conn_dhandle.c | 55 ++++++----- src/conn/conn_handle.c | 4 +- src/conn/conn_stat.c | 8 +- src/conn/conn_sweep.c | 2 +- src/cursor/cur_backup.c | 8 +- src/docs/upgrading.dox | 6 ++ src/evict/evict_lru.c | 50 +++++----- src/evict/evict_stat.c | 2 +- src/include/cache.i | 2 +- src/include/connection.h | 6 +- src/include/dhandle.h | 18 ++++ src/include/extern.h | 1 + src/include/flags.h | 33 +++---- src/include/schema.h | 72 +++++++++++--- src/include/stat.h | 4 +- src/include/wiredtiger.in | 218 +++++++++++++++++++++--------------------- src/lsm/lsm_cursor.c | 4 +- src/lsm/lsm_manager.c | 12 +-- src/lsm/lsm_stat.c | 4 +- src/lsm/lsm_tree.c | 63 ++++++------ src/lsm/lsm_work_unit.c | 4 +- src/schema/schema_drop.c | 2 +- src/schema/schema_rename.c | 2 +- src/schema/schema_worker.c | 2 +- src/session/session_dhandle.c | 43 +++++---- src/support/stat.c | 16 +--- src/txn/txn_ckpt.c | 5 +- 30 files changed, 359 insertions(+), 297 deletions(-) diff --git a/dist/flags.py b/dist/flags.py index 55ce233e60d..216f7c29e0a 100644 --- a/dist/flags.py +++ b/dist/flags.py @@ -117,7 +117,8 @@ flags = { 'SESSION_CAN_WAIT', 'SESSION_INTERNAL', 'SESSION_LOCKED_CHECKPOINT', - 'SESSION_LOCKED_HANDLE_LIST', + 'SESSION_LOCKED_HANDLE_LIST_READ', + 'SESSION_LOCKED_HANDLE_LIST_WRITE', 'SESSION_LOCKED_METADATA', 'SESSION_LOCKED_PASS', 'SESSION_LOCKED_SCHEMA', diff --git a/dist/s_stat b/dist/s_stat index 5d5937e1833..6aeeca6faa6 100755 --- a/dist/s_stat +++ b/dist/s_stat @@ -25,9 +25,6 @@ cat << UNUSED_STAT_FIELDS lock_checkpoint_count lock_checkpoint_wait_application lock_checkpoint_wait_internal -lock_handle_list_count -lock_handle_list_wait_application -lock_handle_list_wait_internal lock_metadata_count lock_metadata_wait_application lock_metadata_wait_internal diff --git a/dist/stat_data.py b/dist/stat_data.py index 0af5d6d017e..a4d92345f88 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -288,9 +288,7 @@ connection_stats = [ LockStat('lock_checkpoint_count', 'checkpoint lock acquisitions'), LockStat('lock_checkpoint_wait_application', 'checkpoint lock application thread wait time (usecs)'), LockStat('lock_checkpoint_wait_internal', 'checkpoint lock internal thread wait time (usecs)'), - LockStat('lock_handle_list_count', 'handle-list lock acquisitions'), - LockStat('lock_handle_list_wait_application', 'handle-list lock application thread wait time (usecs)'), - LockStat('lock_handle_list_wait_internal', 'handle-list lock internal thread wait time (usecs)'), + LockStat('lock_handle_list_wait_eviction', 'handle-list lock eviction thread wait time (usecs)'), LockStat('lock_metadata_count', 'metadata lock acquisitions'), LockStat('lock_metadata_wait_application', 'metadata lock application thread wait time (usecs)'), LockStat('lock_metadata_wait_internal', 'metadata lock internal thread wait time (usecs)'), diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index b2f4bb04ce4..866b8633f71 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -25,21 +25,19 @@ __conn_dhandle_destroy(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle) } /* - * __conn_dhandle_alloc -- + * __wt_conn_dhandle_alloc -- * Allocate a new data handle and return it linked into the connection's * list. */ -static int -__conn_dhandle_alloc(WT_SESSION_IMPL *session, - const char *uri, const char *checkpoint, WT_DATA_HANDLE **dhandlep) +int +__wt_conn_dhandle_alloc( + WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) { WT_BTREE *btree; WT_DATA_HANDLE *dhandle; WT_DECL_RET; uint64_t bucket; - *dhandlep = NULL; - WT_RET(__wt_calloc_one(session, &dhandle)); __wt_rwlock_init(session, &dhandle->rwlock); @@ -75,7 +73,7 @@ __conn_dhandle_alloc(WT_SESSION_IMPL *session, bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE; WT_CONN_DHANDLE_INSERT(S2C(session), dhandle, bucket); - *dhandlep = dhandle; + session->dhandle = dhandle; return (0); err: __conn_dhandle_destroy(session, dhandle); @@ -122,10 +120,7 @@ __wt_conn_dhandle_find( } } - WT_RET(__conn_dhandle_alloc(session, uri, checkpoint, &dhandle)); - - session->dhandle = dhandle; - return (0); + return (WT_NOTFOUND); } /* @@ -419,12 +414,11 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri, { WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle; + WT_DECL_RET; uint64_t bucket; conn = S2C(session); - WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); - /* * If we're given a URI, then we walk only the hash list for that * name. If we don't have a URI we walk the entire dhandle list. @@ -432,29 +426,42 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri, if (uri != NULL) { bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE; - TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) { + + for (dhandle = NULL;;) { + WT_WITH_HANDLE_LIST_READ_LOCK(session, + WT_DHANDLE_NEXT(session, dhandle, + &conn->dhhash[bucket], hashq)); + if (dhandle == NULL) + return (0); + if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) || F_ISSET(dhandle, WT_DHANDLE_DEAD) || dhandle->checkpoint != NULL || strcmp(uri, dhandle->name) != 0) continue; - WT_RET(__conn_btree_apply_internal( - session, dhandle, file_func, name_func, cfg)); + WT_ERR(__conn_btree_apply_internal(session, + dhandle, file_func, name_func, cfg)); } } else { - TAILQ_FOREACH(dhandle, &conn->dhqh, q) { + for (dhandle = NULL;;) { + WT_WITH_HANDLE_LIST_READ_LOCK(session, + WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q)); + if (dhandle == NULL) + return (0); + if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) || F_ISSET(dhandle, WT_DHANDLE_DEAD) || dhandle->checkpoint != NULL || !WT_PREFIX_MATCH(dhandle->name, "file:") || WT_IS_METADATA(dhandle)) continue; - WT_RET(__conn_btree_apply_internal( - session, dhandle, file_func, name_func, cfg)); + WT_ERR(__conn_btree_apply_internal(session, + dhandle, file_func, name_func, cfg)); } } - return (0); +err: WT_DHANDLE_RELEASE(dhandle); + return (ret); } /* @@ -473,7 +480,8 @@ __wt_conn_dhandle_close_all( conn = S2C(session); - WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); + WT_ASSERT(session, + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); WT_ASSERT(session, session->dhandle == NULL); bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE; @@ -534,7 +542,8 @@ __conn_dhandle_remove(WT_SESSION_IMPL *session, bool final) dhandle = session->dhandle; bucket = dhandle->name_hash % WT_HASH_ARRAY_SIZE; - WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); + WT_ASSERT(session, + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); WT_ASSERT(session, dhandle != conn->cache->evict_file_next); /* Check if the handle was reacquired by a session while we waited. */ @@ -583,7 +592,7 @@ __wt_conn_dhandle_discard_single( } /* Try to remove the handle, protected by the data handle lock. */ - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, tret = __conn_dhandle_remove(session, final)); if (set_pass_intr) (void)__wt_atomic_subv32(&S2C(session)->cache->pass_intr, 1); diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index 54bcfd98aba..4f8d89fa9d2 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -53,7 +53,6 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) /* Spinlocks. */ WT_RET(__wt_spin_init(session, &conn->api_lock, "api")); WT_SPIN_INIT_TRACKED(session, &conn->checkpoint_lock, checkpoint); - WT_SPIN_INIT_TRACKED(session, &conn->dhandle_lock, handle_list); WT_RET(__wt_spin_init(session, &conn->encryptor_lock, "encryptor")); WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list")); WT_RET(__wt_spin_init(session, &conn->las_lock, "lookaside table")); @@ -64,6 +63,7 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file")); /* Read-write locks */ + __wt_rwlock_init(session, &conn->dhandle_lock); __wt_rwlock_init(session, &conn->hot_backup_lock); WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS, &conn->page_lock)); @@ -134,7 +134,7 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_spin_destroy(session, &conn->api_lock); __wt_spin_destroy(session, &conn->block_lock); __wt_spin_destroy(session, &conn->checkpoint_lock); - __wt_spin_destroy(session, &conn->dhandle_lock); + __wt_rwlock_destroy(session, &conn->dhandle_lock); __wt_spin_destroy(session, &conn->encryptor_lock); __wt_spin_destroy(session, &conn->fh_lock); __wt_rwlock_destroy(session, &conn->hot_backup_lock); diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c index 31dc9c45992..d89392b66c6 100644 --- a/src/conn/conn_stat.c +++ b/src/conn/conn_stat.c @@ -409,7 +409,6 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp) struct timespec ts; struct tm *tm, _tm; WT_CONNECTION_IMPL *conn; - WT_DECL_RET; WT_FSTREAM *log_stream; conn = S2C(session); @@ -446,12 +445,9 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp) * Lock the schema and walk the list of open handles, dumping * any that match the list of object sources. */ - if (conn->stat_sources != NULL) { - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_conn_btree_apply( + if (conn->stat_sources != NULL) + WT_RET(__wt_conn_btree_apply( session, NULL, __statlog_apply, NULL, NULL)); - WT_RET(ret); - } /* * Walk the list of open LSM trees, dumping any that match the diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index f9b7305c7d8..8c186c63939 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -233,7 +233,7 @@ __sweep_remove_handles(WT_SESSION_IMPL *session) if (!WT_DHANDLE_CAN_DISCARD(dhandle)) continue; - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __sweep_remove_one(session, dhandle)); if (ret == 0) WT_STAT_CONN_INCR(session, dh_sweep_remove); diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c index 08b15e6ca5e..61ced8d11e7 100644 --- a/src/cursor/cur_backup.c +++ b/src/cursor/cur_backup.c @@ -346,13 +346,9 @@ __backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb) static int __backup_all(WT_SESSION_IMPL *session) { - WT_DECL_RET; - /* Build a list of the file objects that need to be copied. */ - WT_WITH_HANDLE_LIST_LOCK(session, ret = - __wt_meta_apply_all(session, NULL, __backup_list_uri_append, NULL)); - - return (ret); + return (__wt_meta_apply_all( + session, NULL, __backup_list_uri_append, NULL)); } /* diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index 4a356f7da61..f463e6bc615 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -7,6 +7,12 @@ The WiredTiger Utility can now \c truncate an object. Removing all contents from the specified object. +
Handle list lock statistics
+
+In the 2.9.1 release we added statistics tracking handle list lock timing, we +have switched that lock from a spin lock to a read-write lock, and consequently +changed the statistics tracking lock related wait time. +
@section version_291 Upgrading to Version 2.9.1 diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 48ea1ccb02b..de1cff85816 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -24,40 +24,40 @@ static int __evict_walk_file( (S2C(s)->evict_threads.current_threads > 1) /* - * __evict_lock_dhandle -- - * Try to get the dhandle lock, with yield and sleep back off. + * __evict_lock_handle_list -- + * Try to get the handle list lock, with yield and sleep back off. * Keep timing statistics overall. */ static int -__evict_lock_dhandle(WT_SESSION_IMPL *session) +__evict_lock_handle_list(WT_SESSION_IMPL *session) { struct timespec enter, leave; WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; - WT_SPINLOCK *dh_lock; - int64_t **stats; + WT_RWLOCK *dh_lock; u_int spins; bool dh_stats; conn = S2C(session); cache = conn->cache; dh_lock = &conn->dhandle_lock; - stats = (int64_t **)conn->stats; - dh_stats = WT_STAT_ENABLED(session) && dh_lock->stat_count_off != -1; /* - * Maintain lock acquisition timing statistics as if this were a - * regular lock acquisition. + * Setup tracking of handle lock acquisition wait time if statistics + * are enabled. */ + dh_stats = WT_STAT_ENABLED(session); + if (dh_stats) __wt_epoch(session, &enter); + /* * Use a custom lock acquisition back off loop so the eviction server * notices any interrupt quickly. */ for (spins = 0; - (ret = __wt_spin_trylock_track(session, dh_lock)) == EBUSY && + (ret = __wt_try_readlock(session, dh_lock)) == EBUSY && cache->pass_intr == 0; spins++) { if (spins < WT_THOUSAND) __wt_yield(); @@ -70,8 +70,9 @@ __evict_lock_dhandle(WT_SESSION_IMPL *session) WT_RET(ret); if (dh_stats) { __wt_epoch(session, &leave); - stats[session->stat_bucket][dh_lock->stat_int_usecs_off] += - (int64_t)WT_TIMEDIFF_US(leave, enter); + WT_STAT_CONN_INCRV( + session, lock_handle_list_wait_eviction, + (int64_t)WT_TIMEDIFF_US(leave, enter)); } return (0); } @@ -379,18 +380,17 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work) * otherwise we can block applications evicting large pages. */ if (!__wt_cache_stuck(session)) { - /* - * If we gave up acquiring the lock, that indicates a - * session is waiting for us to clear walks. Do that - * as part of a normal pass (without the handle list + * Try to get the handle list lock: if we give up, that + * indicates a session is waiting for us to clear walks. Do + * that as part of a normal pass (without the handle list * lock) to avoid deadlock. */ - if ((ret = __evict_lock_dhandle(session)) == EBUSY) + if ((ret = __evict_lock_handle_list(session)) == EBUSY) return (0); WT_RET(ret); ret = __evict_clear_all_walks(session); - __wt_spin_unlock(session, &conn->dhandle_lock); + __wt_readunlock(session, &conn->dhandle_lock); WT_RET(ret); cache->pages_evicted = 0; @@ -1321,7 +1321,7 @@ retry: while (slot < max_entries) { * reference count to keep it alive while we sweep. */ if (!dhandle_locked) { - WT_ERR(__evict_lock_dhandle(session)); + WT_ERR(__evict_lock_handle_list(session)); dhandle_locked = true; } @@ -1400,7 +1400,7 @@ retry: while (slot < max_entries) { (void)__wt_atomic_addi32(&dhandle->session_inuse, 1); incr = true; - __wt_spin_unlock(session, &conn->dhandle_lock); + __wt_readunlock(session, &conn->dhandle_lock); dhandle_locked = false; /* @@ -1447,7 +1447,7 @@ retry: while (slot < max_entries) { } err: if (dhandle_locked) { - __wt_spin_unlock(session, &conn->dhandle_lock); + __wt_readunlock(session, &conn->dhandle_lock); dhandle_locked = false; } @@ -2319,8 +2319,11 @@ __wt_verbose_dump_cache(WT_SESSION_IMPL *session) WT_RET(__wt_msg(session, "%s", WT_DIVIDER)); WT_RET(__wt_msg(session, "cache dump")); - __wt_spin_lock(session, &conn->dhandle_lock); - TAILQ_FOREACH(dhandle, &conn->dhqh, q) { + for (dhandle = NULL;;) { + WT_WITH_HANDLE_LIST_READ_LOCK(session, + WT_DHANDLE_NEXT(session, dhandle, &conn->dhqh, q)); + if (dhandle == NULL) + break; if (!WT_PREFIX_MATCH(dhandle->name, "file:") || !F_ISSET(dhandle, WT_DHANDLE_OPEN)) continue; @@ -2331,7 +2334,6 @@ __wt_verbose_dump_cache(WT_SESSION_IMPL *session) if (ret != 0) break; } - __wt_spin_unlock(session, &conn->dhandle_lock); WT_RET(ret); /* diff --git a/src/evict/evict_stat.c b/src/evict/evict_stat.c index 2dd3b1e83a0..7c2d5722a63 100644 --- a/src/evict/evict_stat.c +++ b/src/evict/evict_stat.c @@ -134,5 +134,5 @@ __wt_curstat_cache_walk(WT_SESSION_IMPL *session) WT_STAT_DATA_SET(session, cache_state_root_size, btree->root.page->memory_footprint); - WT_WITH_HANDLE_LIST_LOCK(session, __evict_stat_walk(session)); + __evict_stat_walk(session); } diff --git a/src/include/cache.i b/src/include/cache.i index 17ab39e97d2..d71978ccf35 100644 --- a/src/include/cache.i +++ b/src/include/cache.i @@ -364,7 +364,7 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool *didworkp) * block eviction), we don't want to highjack the thread for eviction. */ if (F_ISSET(session, WT_SESSION_NO_EVICTION | - WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_LOCKED_SCHEMA)) + WT_SESSION_LOCKED_HANDLE_LIST_WRITE | WT_SESSION_LOCKED_SCHEMA)) return (0); /* In memory configurations don't block when the cache is full. */ diff --git a/src/include/connection.h b/src/include/connection.h index 64ac4271db1..3a719e59608 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -123,12 +123,16 @@ struct __wt_named_extractor { * main queue and the hashed queue. */ #define WT_CONN_DHANDLE_INSERT(conn, dhandle, bucket) do { \ + WT_ASSERT(session, \ + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \ TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \ TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \ ++conn->dhandle_count; \ } while (0) #define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) do { \ + WT_ASSERT(session, \ + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \ TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \ TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \ --conn->dhandle_count; \ @@ -163,13 +167,13 @@ struct __wt_connection_impl { WT_SPINLOCK api_lock; /* Connection API spinlock */ WT_SPINLOCK checkpoint_lock; /* Checkpoint spinlock */ - WT_SPINLOCK dhandle_lock; /* Data handle list spinlock */ WT_SPINLOCK fh_lock; /* File handle queue spinlock */ WT_SPINLOCK metadata_lock; /* Metadata update spinlock */ WT_SPINLOCK reconfig_lock; /* Single thread reconfigure */ WT_SPINLOCK schema_lock; /* Schema operation spinlock */ WT_SPINLOCK table_lock; /* Table creation spinlock */ WT_SPINLOCK turtle_lock; /* Turtle file spinlock */ + WT_RWLOCK dhandle_lock; /* Data handle list lock */ /* * We distribute the btree page locks across a set of spin locks. Don't diff --git a/src/include/dhandle.h b/src/include/dhandle.h index dcc788f0839..4f318e7bccf 100644 --- a/src/include/dhandle.h +++ b/src/include/dhandle.h @@ -37,6 +37,24 @@ #define WT_SESSION_META_DHANDLE(s) \ (((WT_CURSOR_BTREE *)((s)->meta_cursor))->btree->dhandle) +#define WT_DHANDLE_ACQUIRE(dhandle) \ + (void)__wt_atomic_add32(&dhandle->session_ref, 1) + +#define WT_DHANDLE_RELEASE(dhandle) \ + (void)__wt_atomic_sub32(&dhandle->session_ref, 1) + +#define WT_DHANDLE_NEXT(session, dhandle, head, field) do { \ + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));\ + if (dhandle == NULL) \ + dhandle = TAILQ_FIRST(head); \ + else { \ + WT_DHANDLE_RELEASE(dhandle); \ + dhandle = TAILQ_NEXT(dhandle, field); \ + } \ + if (dhandle != NULL) \ + WT_DHANDLE_ACQUIRE(dhandle); \ +} while (0) + /* * WT_DATA_HANDLE -- * A handle for a generic named data source. diff --git a/src/include/extern.h b/src/include/extern.h index eb2f9a0e784..d7d58c58048 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -254,6 +254,7 @@ extern WT_THREAD_RET __wt_cache_pool_server(void *arg) WT_GCC_FUNC_DECL_ATTRIBUT extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_conn_dhandle_alloc( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_conn_dhandle_find( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_conn_btree_open( WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/flags.h b/src/include/flags.h index 0b92a12c686..5219bf33ed6 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -53,22 +53,23 @@ #define WT_SESSION_CAN_WAIT 0x00000001 #define WT_SESSION_INTERNAL 0x00000002 #define WT_SESSION_LOCKED_CHECKPOINT 0x00000004 -#define WT_SESSION_LOCKED_HANDLE_LIST 0x00000008 -#define WT_SESSION_LOCKED_METADATA 0x00000010 -#define WT_SESSION_LOCKED_PASS 0x00000020 -#define WT_SESSION_LOCKED_SCHEMA 0x00000040 -#define WT_SESSION_LOCKED_SLOT 0x00000080 -#define WT_SESSION_LOCKED_TABLE 0x00000100 -#define WT_SESSION_LOCKED_TURTLE 0x00000200 -#define WT_SESSION_LOGGING_INMEM 0x00000400 -#define WT_SESSION_LOOKASIDE_CURSOR 0x00000800 -#define WT_SESSION_NO_CACHE 0x00001000 -#define WT_SESSION_NO_DATA_HANDLES 0x00002000 -#define WT_SESSION_NO_EVICTION 0x00004000 -#define WT_SESSION_NO_LOGGING 0x00008000 -#define WT_SESSION_NO_SCHEMA_LOCK 0x00010000 -#define WT_SESSION_QUIET_CORRUPT_FILE 0x00020000 -#define WT_SESSION_SERVER_ASYNC 0x00040000 +#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000008 +#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000010 +#define WT_SESSION_LOCKED_METADATA 0x00000020 +#define WT_SESSION_LOCKED_PASS 0x00000040 +#define WT_SESSION_LOCKED_SCHEMA 0x00000080 +#define WT_SESSION_LOCKED_SLOT 0x00000100 +#define WT_SESSION_LOCKED_TABLE 0x00000200 +#define WT_SESSION_LOCKED_TURTLE 0x00000400 +#define WT_SESSION_LOGGING_INMEM 0x00000800 +#define WT_SESSION_LOOKASIDE_CURSOR 0x00001000 +#define WT_SESSION_NO_CACHE 0x00002000 +#define WT_SESSION_NO_DATA_HANDLES 0x00004000 +#define WT_SESSION_NO_EVICTION 0x00008000 +#define WT_SESSION_NO_LOGGING 0x00010000 +#define WT_SESSION_NO_SCHEMA_LOCK 0x00020000 +#define WT_SESSION_QUIET_CORRUPT_FILE 0x00040000 +#define WT_SESSION_SERVER_ASYNC 0x00080000 #define WT_STAT_CLEAR 0x00000001 #define WT_STAT_JSON 0x00000002 #define WT_STAT_ON_CLOSE 0x00000004 diff --git a/src/include/schema.h b/src/include/schema.h index bb116e5cf2f..fff57951c0e 100644 --- a/src/include/schema.h +++ b/src/include/schema.h @@ -78,6 +78,11 @@ struct __wt_table { */ #define WT_COLGROUPS(t) WT_MAX((t)->ncolgroups, 1) +/* Make it simple to check a generic locked state on the handle list lock */ +#define WT_SESSION_LOCKED_HANDLE_LIST \ + (WT_SESSION_LOCKED_HANDLE_LIST_READ | \ + WT_SESSION_LOCKED_HANDLE_LIST_WRITE) + /* * WT_WITH_LOCK_WAIT -- * Wait for a lock, perform an operation, drop the lock. @@ -122,16 +127,47 @@ struct __wt_table { &S2C(session)->checkpoint_lock, WT_SESSION_LOCKED_CHECKPOINT, op) /* - * WT_WITH_HANDLE_LIST_LOCK -- - * Acquire the data handle list lock, perform an operation, drop the lock. + * WT_WITH_HANDLE_LIST_READ_LOCK -- + * Acquire the data handle list lock in shared mode, perform an operation, + * drop the lock. The handle list lock is a read-write lock so the + * implementation is different to the other lock macros. * * Note: always waits because some operations need the handle list lock to * discard handles, and we only expect it to be held across short * operations. */ -#define WT_WITH_HANDLE_LIST_LOCK(session, op) \ - WT_WITH_LOCK_WAIT(session, \ - &S2C(session)->dhandle_lock, WT_SESSION_LOCKED_HANDLE_LIST, op) +#define WT_WITH_HANDLE_LIST_READ_LOCK(session, op) do { \ + if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) { \ + op; \ + } else { \ + __wt_readlock(session, &S2C(session)->dhandle_lock); \ + F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ + __wt_readunlock(session, &S2C(session)->dhandle_lock); \ + } \ +} while (0) + +/* + * WT_WITH_HANDLE_LIST_WRITE_LOCK -- + * Acquire the data handle list lock in shared mode, perform an operation, + * drop the lock. The handle list lock is a read-write lock so the + * implementation is different to the other lock macros. + * Automatically upgrade from a read lock if held. + */ +#define WT_WITH_HANDLE_LIST_WRITE_LOCK(session, op) do { \ + if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)) { \ + op; \ + } else { \ + WT_ASSERT(session, \ + !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ));\ + __wt_writelock(session, &S2C(session)->dhandle_lock); \ + F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ + __wt_writeunlock(session, &S2C(session)->dhandle_lock); \ + } \ +} while (0) /* * WT_WITH_METADATA_LOCK -- @@ -192,15 +228,21 @@ struct __wt_table { WT_CONNECTION_IMPL *__conn = S2C(session); \ bool __checkpoint_locked = \ F_ISSET(session, WT_SESSION_LOCKED_CHECKPOINT); \ - bool __handle_locked = \ - F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST); \ + bool __handle_read_locked = \ + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ + bool __handle_write_locked = \ + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ bool __table_locked = \ F_ISSET(session, WT_SESSION_LOCKED_TABLE); \ bool __schema_locked = \ F_ISSET(session, WT_SESSION_LOCKED_SCHEMA); \ - if (__handle_locked) { \ - F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST); \ - __wt_spin_unlock(session, &__conn->dhandle_lock); \ + if (__handle_read_locked) { \ + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ + __wt_readunlock(session, &__conn->dhandle_lock); \ + } \ + if (__handle_write_locked) { \ + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ + __wt_writeunlock(session, &__conn->dhandle_lock); \ } \ if (__table_locked) { \ F_CLR(session, WT_SESSION_LOCKED_TABLE); \ @@ -227,8 +269,12 @@ struct __wt_table { __wt_spin_lock(session, &__conn->table_lock); \ F_SET(session, WT_SESSION_LOCKED_TABLE); \ } \ - if (__handle_locked) { \ - __wt_spin_lock(session, &__conn->dhandle_lock); \ - F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST); \ + if (__handle_read_locked) { \ + __wt_readlock(session, &__conn->dhandle_lock); \ + F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ + } \ + if (__handle_write_locked) { \ + __wt_writelock(session, &__conn->dhandle_lock); \ + F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ } \ } while (0) diff --git a/src/include/stat.h b/src/include/stat.h index fd3e3290d95..8b2e78a4ed5 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -392,9 +392,7 @@ struct __wt_connection_stats { int64_t lock_checkpoint_count; int64_t lock_checkpoint_wait_application; int64_t lock_checkpoint_wait_internal; - int64_t lock_handle_list_count; - int64_t lock_handle_list_wait_application; - int64_t lock_handle_list_wait_internal; + int64_t lock_handle_list_wait_eviction; int64_t lock_metadata_count; int64_t lock_metadata_wait_application; int64_t lock_metadata_wait_internal; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index f05d3d4ab55..d1e3d383396 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -4595,240 +4595,236 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1133 /*! lock: checkpoint lock internal thread wait time (usecs) */ #define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1134 -/*! lock: handle-list lock acquisitions */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_COUNT 1135 -/*! lock: handle-list lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_APPLICATION 1136 -/*! lock: handle-list lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_INTERNAL 1137 +/*! lock: handle-list lock eviction thread wait time (usecs) */ +#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1135 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1138 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1136 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1139 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1137 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1140 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1138 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1141 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1139 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1142 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1140 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1143 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1141 /*! lock: table lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_COUNT 1144 +#define WT_STAT_CONN_LOCK_TABLE_COUNT 1142 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1145 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1143 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1146 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1144 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1147 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1145 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1148 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1146 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1149 +#define WT_STAT_CONN_LOG_SLOT_RACES 1147 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1150 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1148 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1151 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1149 /*! log: consolidated slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1152 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1150 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1153 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1151 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1154 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1152 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1155 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1153 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1156 +#define WT_STAT_CONN_LOG_FLUSH 1154 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1157 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1155 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1158 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1156 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1159 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1157 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1160 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1158 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1161 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1159 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1162 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1160 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1163 +#define WT_STAT_CONN_LOG_SCANS 1161 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1164 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1162 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1165 +#define WT_STAT_CONN_LOG_WRITE_LSN 1163 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1166 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1164 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1167 +#define WT_STAT_CONN_LOG_SYNC 1165 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1168 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1166 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1169 +#define WT_STAT_CONN_LOG_SYNC_DIR 1167 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1170 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1168 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1171 +#define WT_STAT_CONN_LOG_WRITES 1169 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1172 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1170 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1173 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1171 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1174 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1172 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1175 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1173 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1176 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1174 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1177 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1175 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1178 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1176 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1179 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1177 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1180 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1178 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1181 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1179 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1182 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1180 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1183 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1181 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1184 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1182 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1185 +#define WT_STAT_CONN_REC_PAGES 1183 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1186 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1184 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1187 +#define WT_STAT_CONN_REC_PAGE_DELETE 1185 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1188 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1186 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1189 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1187 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1190 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1188 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1191 +#define WT_STAT_CONN_SESSION_OPEN 1189 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1192 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1190 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1193 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1191 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1194 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1192 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1195 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1193 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1196 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1194 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1197 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1195 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1198 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1196 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1199 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1197 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1200 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1198 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1201 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1199 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1202 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1200 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1203 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1201 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1204 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1202 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1205 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1203 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1206 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1204 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1207 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1205 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1208 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1206 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1209 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1207 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1210 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1208 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1211 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1209 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1212 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1210 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1213 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1211 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1214 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1212 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1215 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1213 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1216 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1214 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1217 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1215 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1218 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1216 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1219 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1217 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1220 +#define WT_STAT_CONN_PAGE_SLEEP 1218 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1221 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1219 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1222 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1220 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1223 +#define WT_STAT_CONN_TXN_BEGIN 1221 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1224 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1222 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1225 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1223 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1226 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1224 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1227 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1225 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1228 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1226 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1229 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1227 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1230 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1228 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1231 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1229 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1232 +#define WT_STAT_CONN_TXN_CHECKPOINT 1230 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1233 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1231 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1234 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1232 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1235 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1233 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1236 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1234 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1237 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1235 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1238 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1236 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1239 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1237 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1240 +#define WT_STAT_CONN_TXN_SYNC 1238 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1241 +#define WT_STAT_CONN_TXN_COMMIT 1239 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1242 +#define WT_STAT_CONN_TXN_ROLLBACK 1240 /*! * @} diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index a2511f48e2b..60afbc99ade 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -1692,8 +1692,8 @@ __wt_clsm_open(WT_SESSION_IMPL *session, bulk = cval.val != 0; /* Get the LSM tree. */ - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, uri, bulk, &lsm_tree)); + ret = __wt_lsm_tree_get(session, uri, bulk, &lsm_tree); + /* * Check whether the exclusive open for a bulk load succeeded, and * if it did ensure that it's safe to bulk load into the tree. diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index cbd83a5cd30..6dc06146179 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -387,8 +387,8 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) __wt_sleep(0, 10000); if (TAILQ_EMPTY(&conn->lsmqh)) continue; - __wt_spin_lock(session, &conn->dhandle_lock); - F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST); + __wt_readlock(session, &conn->dhandle_lock); + F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); dhandle_locked = true; TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) { if (!lsm_tree->active) @@ -448,14 +448,14 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) session, WT_LSM_WORK_MERGE, 0, lsm_tree)); } } - __wt_spin_unlock(session, &conn->dhandle_lock); - F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST); + __wt_readunlock(session, &conn->dhandle_lock); + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); dhandle_locked = false; } err: if (dhandle_locked) { - __wt_spin_unlock(session, &conn->dhandle_lock); - F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST); + __wt_readunlock(session, &conn->dhandle_lock); + F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); } return (ret); } diff --git a/src/lsm/lsm_stat.c b/src/lsm/lsm_stat.c index 150de968722..21e8991be94 100644 --- a/src/lsm/lsm_stat.c +++ b/src/lsm/lsm_stat.c @@ -33,9 +33,7 @@ __curstat_lsm_init( "checkpoint=" WT_CHECKPOINT, NULL, NULL }; locked = false; - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree)); - WT_RET(ret); + WT_RET(__wt_lsm_tree_get(session, uri, false, &lsm_tree)); WT_ERR(__wt_scr_alloc(session, 0, &uribuf)); /* Propagate all, fast and/or clear to the cursors we open. */ diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 71a981a6284..a9275976023 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -38,7 +38,7 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final) /* We may be destroying an lsm_tree before it was added. */ if (F_ISSET(lsm_tree, WT_LSM_TREE_OPEN)) { WT_ASSERT(session, final || - F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); TAILQ_REMOVE(&S2C(session)->lsmqh, lsm_tree, q); } @@ -321,9 +321,7 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session, metadata = NULL; /* If the tree can be opened, it already exists. */ - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree)); - if (ret == 0) { + if ((ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree)) == 0) { __wt_lsm_tree_release(session, lsm_tree); return (exclusive ? EEXIST : 0); } @@ -339,7 +337,7 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session, * error: the returned handle is NULL on error, and the metadata * tracking macros handle cleaning up on failure. */ - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __lsm_tree_open(session, uri, true, &lsm_tree)); if (ret == 0) __wt_lsm_tree_release(session, lsm_tree); @@ -404,6 +402,9 @@ __lsm_tree_find(WT_SESSION_IMPL *session, } *treep = lsm_tree; + + WT_ASSERT(session, lsm_tree->excl_session == + (exclusive ? session : NULL)); return (0); } @@ -456,7 +457,8 @@ __lsm_tree_open(WT_SESSION_IMPL *session, conn = S2C(session); lsm_tree = NULL; - WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); + WT_ASSERT(session, + F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); /* Start the LSM manager thread if it isn't running. */ if (__wt_atomic_cas32(&conn->lsm_manager.lsm_workers, 0, 1)) @@ -520,14 +522,21 @@ __wt_lsm_tree_get(WT_SESSION_IMPL *session, { WT_DECL_RET; - WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); - - ret = __lsm_tree_find(session, uri, exclusive, treep); + /* + * Dropping and re-acquiring the lock is safe here, since the tree open + * call checks to see if another thread beat it to opening the tree + * before proceeding. + */ + if (exclusive) + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, + ret = __lsm_tree_find(session, uri, exclusive, treep)); + else + WT_WITH_HANDLE_LIST_READ_LOCK(session, + ret = __lsm_tree_find(session, uri, exclusive, treep)); if (ret == WT_NOTFOUND) - ret = __lsm_tree_open(session, uri, exclusive, treep); + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, + ret = __lsm_tree_open(session, uri, exclusive, treep)); - WT_ASSERT(session, ret != 0 || - (*treep)->excl_session == (exclusive ? session : NULL)); return (ret); } @@ -857,9 +866,7 @@ __wt_lsm_tree_alter( locked = false; /* Get the LSM tree. */ - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree)); - WT_RET(ret); + WT_RET(__wt_lsm_tree_get(session, uri, false, &lsm_tree)); /* Prevent any new opens. */ __wt_lsm_tree_writelock(session, lsm_tree); @@ -899,9 +906,7 @@ __wt_lsm_tree_drop( locked = false; /* Get the LSM tree. */ - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, name, true, &lsm_tree)); - WT_RET(ret); + WT_RET(__wt_lsm_tree_get(session, name, true, &lsm_tree)); WT_ASSERT(session, !lsm_tree->active); /* Prevent any new opens. */ @@ -934,7 +939,7 @@ __wt_lsm_tree_drop( WT_ASSERT(session, !lsm_tree->active); err: if (locked) __wt_lsm_tree_writeunlock(session, lsm_tree); - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, tret = __lsm_tree_discard(session, lsm_tree, false)); WT_TRET(tret); return (ret); @@ -960,9 +965,7 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session, locked = false; /* Get the LSM tree. */ - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, olduri, true, &lsm_tree)); - WT_RET(ret); + WT_RET(__wt_lsm_tree_get(session, olduri, true, &lsm_tree)); /* Prevent any new opens. */ __wt_lsm_tree_writelock(session, lsm_tree); @@ -1007,7 +1010,7 @@ err: if (locked) * Discard this LSM tree structure. The first operation on the renamed * tree will create a new one. */ - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, tret = __lsm_tree_discard(session, lsm_tree, false)); WT_TRET(tret); return (ret); @@ -1032,9 +1035,7 @@ __wt_lsm_tree_truncate( locked = false; /* Get the LSM tree. */ - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, name, true, &lsm_tree)); - WT_RET(ret); + WT_RET(__wt_lsm_tree_get(session, name, true, &lsm_tree)); /* Prevent any new opens. */ __wt_lsm_tree_writelock(session, lsm_tree); @@ -1068,7 +1069,7 @@ err: if (locked) * the last good version of the metadata will be used, resulting * in a valid (not truncated) tree. */ - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, tret = __lsm_tree_discard(session, lsm_tree, false)); WT_TRET(tret); } @@ -1157,9 +1158,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) /* Tell __wt_schema_worker not to look inside the LSM tree. */ *skipp = true; - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, name, false, &lsm_tree)); - WT_RET(ret); + WT_RET(__wt_lsm_tree_get(session, name, false, &lsm_tree)); if (!F_ISSET(S2C(session), WT_CONN_LSM_MERGE)) WT_ERR_MSG(session, EINVAL, @@ -1356,9 +1355,7 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session, locked = false; exclusive = FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE); - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_lsm_tree_get(session, uri, exclusive, &lsm_tree)); - WT_RET(ret); + WT_RET(__wt_lsm_tree_get(session, uri, exclusive, &lsm_tree)); /* * We mark that we're busy using the tree to coordinate diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index d9c185a3f58..4349acf7b55 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -276,7 +276,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && !F_ISSET(chunk, WT_LSM_CHUNK_STABLE) && !chunk->evicted) { - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __lsm_discard_handle(session, chunk->uri, NULL)); if (ret == 0) chunk->evicted = 1; @@ -517,7 +517,7 @@ __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri) * * This will fail with EBUSY if the file is still in use. */ - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __lsm_discard_handle(session, uri, WT_CHECKPOINT)); WT_RET(ret); diff --git a/src/schema/schema_drop.c b/src/schema/schema_drop.c index c1a4f257648..49801e4e5f9 100644 --- a/src/schema/schema_drop.c +++ b/src/schema/schema_drop.c @@ -30,7 +30,7 @@ __drop_file( WT_RET(__wt_schema_backup_check(session, filename)); /* Close all btree handles associated with this file. */ - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __wt_conn_dhandle_close_all(session, uri, force)); WT_RET(ret); diff --git a/src/schema/schema_rename.c b/src/schema/schema_rename.c index f512482c162..a374f4c2831 100644 --- a/src/schema/schema_rename.c +++ b/src/schema/schema_rename.c @@ -33,7 +33,7 @@ __rename_file( WT_RET(__wt_schema_backup_check(session, filename)); WT_RET(__wt_schema_backup_check(session, newfile)); /* Close any btree handles in the file. */ - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __wt_conn_dhandle_close_all(session, uri, false)); WT_ERR(ret); diff --git a/src/schema/schema_worker.c b/src/schema/schema_worker.c index fb7f8cec074..e5f71b5d56f 100644 --- a/src/schema/schema_worker.c +++ b/src/schema/schema_worker.c @@ -49,7 +49,7 @@ __wt_schema_worker(WT_SESSION_IMPL *session, * any open file handles, including checkpoints. */ if (FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE)) { - WT_WITH_HANDLE_LIST_LOCK(session, + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, ret = __wt_conn_dhandle_close_all( session, uri, false)); WT_ERR(ret); diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c index f1251794b89..ee9bddbfc19 100644 --- a/src/session/session_dhandle.c +++ b/src/session/session_dhandle.c @@ -44,8 +44,7 @@ __session_discard_dhandle( TAILQ_REMOVE(&session->dhandles, dhandle_cache, q); TAILQ_REMOVE(&session->dhhash[bucket], dhandle_cache, hashq); - (void)__wt_atomic_sub32(&dhandle_cache->dhandle->session_ref, 1); - + WT_DHANDLE_RELEASE(dhandle_cache->dhandle); __wt_overwrite_and_free(session, dhandle_cache); } @@ -412,17 +411,27 @@ __session_dhandle_sweep(WT_SESSION_IMPL *session) /* * __session_find_shared_dhandle -- * Search for a data handle in the connection and add it to a session's - * cache. Since the data handle isn't locked, this must be called holding - * the handle list lock, and we must increment the handle's reference - * count before releasing it. + * cache. We must increment the handle's reference count while holding + * the handle list lock. */ static int __session_find_shared_dhandle( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) { - WT_RET(__wt_conn_dhandle_find(session, uri, checkpoint)); - (void)__wt_atomic_add32(&session->dhandle->session_ref, 1); - return (0); + WT_DECL_RET; + + WT_WITH_HANDLE_LIST_READ_LOCK(session, + if ((ret = __wt_conn_dhandle_find(session, uri, checkpoint)) == 0) + WT_DHANDLE_ACQUIRE(session->dhandle)); + + if (ret != WT_NOTFOUND) + return (ret); + + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, + if ((ret = __wt_conn_dhandle_alloc(session, uri, checkpoint)) == 0) + WT_DHANDLE_ACQUIRE(session->dhandle)); + + return (ret); } /* @@ -450,16 +459,16 @@ __session_get_dhandle( * We didn't find a match in the session cache, search the shared * handle list and cache the handle we find. */ - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __session_find_shared_dhandle(session, uri, checkpoint)); - WT_RET(ret); + WT_RET(__session_find_shared_dhandle(session, uri, checkpoint)); /* * Fixup the reference count on failure (we incremented the reference * count while holding the handle-list lock). */ - if ((ret = __session_add_dhandle(session)) != 0) - (void)__wt_atomic_sub32(&session->dhandle->session_ref, 1); + if ((ret = __session_add_dhandle(session)) != 0) { + WT_DHANDLE_RELEASE(session->dhandle); + session->dhandle = NULL; + } return (ret); } @@ -505,17 +514,15 @@ __wt_session_get_btree(WT_SESSION_IMPL *session, * reopen handles in the meantime. A combination of the schema * and handle list locks are used to enforce this. */ - if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA) || - !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) { + if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) { dhandle->excl_session = NULL; dhandle->excl_ref = 0; F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE); __wt_writeunlock(session, &dhandle->rwlock); WT_WITH_SCHEMA_LOCK(session, - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __wt_session_get_btree( - session, uri, checkpoint, cfg, flags))); + ret = __wt_session_get_btree( + session, uri, checkpoint, cfg, flags)); return (ret); } diff --git a/src/support/stat.c b/src/support/stat.c index 167d17137ce..fd38e1b79ee 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -759,9 +759,7 @@ static const char * const __stats_connection_desc[] = { "lock: checkpoint lock acquisitions", "lock: checkpoint lock application thread wait time (usecs)", "lock: checkpoint lock internal thread wait time (usecs)", - "lock: handle-list lock acquisitions", - "lock: handle-list lock application thread wait time (usecs)", - "lock: handle-list lock internal thread wait time (usecs)", + "lock: handle-list lock eviction thread wait time (usecs)", "lock: metadata lock acquisitions", "lock: metadata lock application thread wait time (usecs)", "lock: metadata lock internal thread wait time (usecs)", @@ -1044,9 +1042,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->lock_checkpoint_count = 0; stats->lock_checkpoint_wait_application = 0; stats->lock_checkpoint_wait_internal = 0; - stats->lock_handle_list_count = 0; - stats->lock_handle_list_wait_application = 0; - stats->lock_handle_list_wait_internal = 0; + stats->lock_handle_list_wait_eviction = 0; stats->lock_metadata_count = 0; stats->lock_metadata_wait_application = 0; stats->lock_metadata_wait_internal = 0; @@ -1351,12 +1347,8 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, lock_checkpoint_wait_application); to->lock_checkpoint_wait_internal += WT_STAT_READ(from, lock_checkpoint_wait_internal); - to->lock_handle_list_count += - WT_STAT_READ(from, lock_handle_list_count); - to->lock_handle_list_wait_application += - WT_STAT_READ(from, lock_handle_list_wait_application); - to->lock_handle_list_wait_internal += - WT_STAT_READ(from, lock_handle_list_wait_internal); + to->lock_handle_list_wait_eviction += + WT_STAT_READ(from, lock_handle_list_wait_eviction); to->lock_metadata_count += WT_STAT_READ(from, lock_metadata_count); to->lock_metadata_wait_application += WT_STAT_READ(from, lock_metadata_wait_application); diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 3b19162fd3d..7b33b0c7788 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -640,9 +640,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_ASSERT(session, session->ckpt_handle_next == 0); WT_WITH_SCHEMA_LOCK(session, WT_WITH_TABLE_LOCK(session, - WT_WITH_HANDLE_LIST_LOCK(session, - ret = __checkpoint_apply_all( - session, cfg, __wt_checkpoint_get_handles, NULL)))); + ret = __checkpoint_apply_all( + session, cfg, __wt_checkpoint_get_handles, NULL))); WT_ERR(ret); /* -- cgit v1.2.1 From 0562f92104f0b2d8ef218d9fe465ef718bc2d9cd Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 2 Feb 2017 16:40:30 +1100 Subject: WT-3150 Reduce impact of checkpoints on eviction. (#3265) In particular, don't have the eviction server give up all walks each time it is interrupted, and only wait for requesting threads to make progress: don't go to sleep. --- src/evict/evict_lru.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index de1cff85816..3cb513fd87b 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -281,7 +281,7 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; - bool did_work; + bool did_work, was_intr; conn = S2C(session); cache = conn->cache; @@ -309,8 +309,21 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) ret = __evict_server(session, &did_work); F_CLR(cache->walk_session, WT_SESSION_LOCKED_PASS); F_CLR(session, WT_SESSION_LOCKED_PASS); + was_intr = cache->pass_intr != 0; __wt_spin_unlock(session, &cache->evict_pass_lock); WT_ERR(ret); + + /* + * If the eviction server was interrupted, wait until + * requests have been processed: the system may + * otherwise be busy so don't go to sleep. + */ + if (was_intr) { + while (cache->pass_intr != 0) + __wt_yield(); + continue; + } + __wt_verbose(session, WT_VERB_EVICTSERVER, "sleeping"); /* Don't rely on signals: check periodically. */ @@ -372,7 +385,8 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work) /* Evict pages from the cache as needed. */ WT_RET(__evict_pass(session)); - if (!F_ISSET(conn, WT_CONN_EVICTION_RUN)) + if (!F_ISSET(conn, WT_CONN_EVICTION_RUN) || + cache->pass_intr != 0) return (0); /* -- cgit v1.2.1 From 3e68fb2d7da35eeb122308971f02203c58caa538 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Fri, 3 Feb 2017 03:28:50 +1100 Subject: WT-3139 Enhance wtperf to support periodic table scans (#3268) * Enhance wtperf to support periodic table scans * Implement scans as read_range. * Use a random cursor to set key in table properly. * Don't allow insert workload with table specifier. * Reset the rand cursor so it isn't positioned. * Make wtperf pre_load_data an option. --- bench/wtperf/config.c | 42 ++++++- bench/wtperf/idle_table_cycle.c | 2 + bench/wtperf/stress/btree-split-stress.wtperf | 3 +- bench/wtperf/wtperf.c | 163 ++++++++++++++++++++------ bench/wtperf/wtperf.h | 5 + bench/wtperf/wtperf_opt.i | 10 +- src/docs/wtperf.dox | 6 +- 7 files changed, 183 insertions(+), 48 deletions(-) diff --git a/bench/wtperf/config.c b/bench/wtperf/config.c index a15a3485dde..9eea99eeec4 100644 --- a/bench/wtperf/config.c +++ b/bench/wtperf/config.c @@ -215,6 +215,7 @@ config_threads(WTPERF *wtperf, const char *config, size_t len) return (EINVAL); } workp = &wtperf->workload[wtperf->workload_cnt++]; + workp->table_index = INT32_MAX; while ((ret = scan->next(scan, &k, &v)) == 0) { if (STRING_MATCH("count", k.str, k.len)) { @@ -233,12 +234,28 @@ config_threads(WTPERF *wtperf, const char *config, size_t len) goto err; continue; } + if (STRING_MATCH("pause", k.str, k.len)) { + if ((workp->pause = v.val) < 0) + goto err; + continue; + } if (STRING_MATCH("read", k.str, k.len) || STRING_MATCH("reads", k.str, k.len)) { if ((workp->read = v.val) < 0) goto err; continue; } + if (STRING_MATCH("read_range", k.str, k.len)) { + if ((workp->read_range = v.val) < 0) + goto err; + continue; + } + if (STRING_MATCH("table", k.str, k.len)) { + if (v.val <= 0) + goto err; + workp->table_index = (int32_t)v.val - 1; + continue; + } if (STRING_MATCH("throttle", k.str, k.len)) { workp->throttle = (uint64_t)v.val; continue; @@ -760,16 +777,33 @@ config_sanity(WTPERF *wtperf) opts->value_sz_min = opts->value_sz; } - if (opts->readonly && wtperf->workload != NULL) + if (wtperf->workload != NULL) for (i = 0, workp = wtperf->workload; - i < wtperf->workload_cnt; ++i, ++workp) - if (workp->insert != 0 || workp->update != 0 || - workp->truncate != 0) { + i < wtperf->workload_cnt; ++i, ++workp) { + if (opts->readonly && + (workp->insert != 0 || workp->update != 0 || + workp->truncate != 0)) { fprintf(stderr, "Invalid workload: insert, update or " "truncate specified with readonly\n"); return (EINVAL); } + if (workp->insert != 0 && + workp->table_index != INT32_MAX) { + fprintf(stderr, + "Invalid workload: Cannot insert into " + "specific table only\n"); + return (EINVAL); + } + if (workp->table_index != INT32_MAX && + workp->table_index >= (int32_t)opts->table_count) { + fprintf(stderr, + "Workload table index %" PRId32 + " is larger than table count %" PRId32, + workp->table_index, opts->table_count); + return (EINVAL); + } + } return (0); } diff --git a/bench/wtperf/idle_table_cycle.c b/bench/wtperf/idle_table_cycle.c index 13fa55e86f5..bb44cfbde59 100644 --- a/bench/wtperf/idle_table_cycle.c +++ b/bench/wtperf/idle_table_cycle.c @@ -120,6 +120,7 @@ cycle_idle_tables(void *arg) return (NULL); start = stop; +#if 1 /* * Drop the table. Keep retrying on EBUSY failure - it is an * expected return when checkpoints are happening. @@ -136,6 +137,7 @@ cycle_idle_tables(void *arg) } if (check_timing(wtperf, "drop", start, &stop) != 0) return (NULL); +#endif } return (NULL); diff --git a/bench/wtperf/stress/btree-split-stress.wtperf b/bench/wtperf/stress/btree-split-stress.wtperf index 86bb288fc6d..eb6ca1cfddc 100644 --- a/bench/wtperf/stress/btree-split-stress.wtperf +++ b/bench/wtperf/stress/btree-split-stress.wtperf @@ -6,5 +6,4 @@ run_time=300 reopen_connection=false populate_threads=2 value_sz=256 -read_range=100 -threads=((count=4,inserts=1,throttle=100000),(count=8,reads=1)) +threads=((count=4,inserts=1,throttle=100000),(count=8,reads=1,read_range=100)) diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index baa259f8817..044fd38dc06 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -432,19 +432,17 @@ err: wtperf->error = wtperf->stop = true; * search do them. Ensuring the keys we see are always in order. */ static int -do_range_reads(WTPERF *wtperf, WT_CURSOR *cursor) +do_range_reads(WTPERF *wtperf, WT_CURSOR *cursor, int64_t read_range) { - CONFIG_OPTS *opts; - size_t range; uint64_t next_val, prev_val; + int64_t range; char *range_key_buf; char buf[512]; int ret; - opts = wtperf->opts; ret = 0; - if (opts->read_range == 0) + if (read_range == 0) return (0); memset(&buf[0], 0, 512 * sizeof(char)); @@ -454,7 +452,7 @@ do_range_reads(WTPERF *wtperf, WT_CURSOR *cursor) testutil_check(cursor->get_key(cursor, &range_key_buf)); extract_key(range_key_buf, &next_val); - for (range = 0; range < opts->read_range; ++range) { + for (range = 0; range < read_range; ++range) { prev_val = next_val; ret = cursor->next(cursor); /* We are done if we reach the end. */ @@ -475,12 +473,56 @@ do_range_reads(WTPERF *wtperf, WT_CURSOR *cursor) return (0); } +/* pre_load_data -- + * Pull everything into cache before starting the workload phase. + */ +static int +pre_load_data(WTPERF *wtperf) +{ + CONFIG_OPTS *opts; + WT_CONNECTION *conn; + WT_CURSOR *cursor; + WT_SESSION *session; + char *key; + int ret; + size_t i; + + opts = wtperf->opts; + conn = wtperf->conn; + + if ((ret = conn->open_session( + conn, NULL, opts->sess_config, &session)) != 0) { + lprintf(wtperf, ret, 0, "worker: WT_CONNECTION.open_session"); + goto err; + } + for (i = 0; i < opts->table_count; i++) { + if ((ret = session->open_cursor(session, + wtperf->uris[i], NULL, NULL, &cursor)) != 0) { + lprintf(wtperf, ret, 0, + "worker: WT_SESSION.open_cursor: %s", + wtperf->uris[i]); + goto err; + } + while (cursor->next(cursor) == 0) + if ((ret = cursor->get_key(cursor, &key)) != 0) + goto err; + if ((ret = cursor->close(cursor)) != 0) + goto err; + } + if ((ret = session->close(session, NULL)) != 0) + goto err; + if (ret != 0) +err: lprintf(wtperf, ret, 0, "Pre-workload traverse error"); + return (ret); +} + static void * worker(void *arg) { struct timespec start, stop; CONFIG_OPTS *opts; TRACK *trk; + WORKLOAD *workload; WTPERF *wtperf; WTPERF_THREAD *thread; WT_CONNECTION *conn; @@ -495,13 +537,14 @@ worker(void *arg) char buf[512]; thread = (WTPERF_THREAD *)arg; + workload = thread->workload; wtperf = thread->wtperf; opts = wtperf->opts; conn = wtperf->conn; cursors = NULL; - log_table_cursor = NULL; /* -Wconditional-initialized */ + cursor = log_table_cursor = NULL; /* -Wconditional-initialized */ ops = 0; - ops_per_txn = thread->workload->ops_per_txn; + ops_per_txn = workload->ops_per_txn; session = NULL; trk = NULL; @@ -510,7 +553,6 @@ worker(void *arg) lprintf(wtperf, ret, 0, "worker: WT_CONNECTION.open_session"); goto err; } - cursors = dcalloc(opts->table_count, sizeof(WT_CURSOR *)); for (i = 0; i < opts->table_count_idle; i++) { snprintf(buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i); if ((ret = session->open_cursor( @@ -525,14 +567,34 @@ worker(void *arg) goto err; } } - for (i = 0; i < opts->table_count; i++) { + if (workload->table_index != INT32_MAX) { if ((ret = session->open_cursor(session, - wtperf->uris[i], NULL, NULL, &cursors[i])) != 0) { + wtperf->uris[workload->table_index], + NULL, NULL, &cursor)) != 0) { lprintf(wtperf, ret, 0, "worker: WT_SESSION.open_cursor: %s", - wtperf->uris[i]); + wtperf->uris[workload->table_index]); + goto err; + } + if ((ret = session->open_cursor(session, + wtperf->uris[workload->table_index], + NULL, "next_random=true", &thread->rand_cursor)) != 0) { + lprintf(wtperf, ret, 0, + "worker: WT_SESSION.open_cursor: random %s", + wtperf->uris[workload->table_index]); goto err; } + } else { + cursors = dcalloc(opts->table_count, sizeof(WT_CURSOR *)); + for (i = 0; i < opts->table_count; i++) { + if ((ret = session->open_cursor(session, + wtperf->uris[i], NULL, NULL, &cursors[i])) != 0) { + lprintf(wtperf, ret, 0, + "worker: WT_SESSION.open_cursor: %s", + wtperf->uris[i]); + goto err; + } + } } if (opts->log_like_table && (ret = session->open_cursor(session, wtperf->log_table_uri, NULL, NULL, &log_table_cursor)) != 0) { @@ -543,19 +605,19 @@ worker(void *arg) } /* Setup the timer for throttling. */ - if (thread->workload->throttle != 0) + if (workload->throttle != 0) setup_throttle(thread); /* Setup for truncate */ - if (thread->workload->truncate != 0) + if (workload->truncate != 0) if ((ret = setup_truncate(wtperf, thread, session)) != 0) goto err; key_buf = thread->key_buf; value_buf = thread->value_buf; - op = thread->workload->ops; - op_end = op + sizeof(thread->workload->ops); + op = workload->ops; + op_end = op + sizeof(workload->ops); if ((ops_per_txn != 0 || opts->log_like_table) && (ret = session->begin_transaction(session, NULL)) != 0) { @@ -564,6 +626,8 @@ worker(void *arg) } while (!wtperf->stop) { + if (workload->pause != 0) + (void)sleep((unsigned int)workload->pause); /* * Generate the next key and setup operation specific * statistics tracking objects. @@ -603,10 +667,12 @@ worker(void *arg) generate_key(opts, key_buf, next_val); - /* - * Spread the data out around the multiple databases. - */ - cursor = cursors[map_key_to_table(wtperf->opts, next_val)]; + if (workload->table_index == INT32_MAX) + /* + * Spread the data out around the multiple databases. + */ + cursor = cursors[ + map_key_to_table(wtperf->opts, next_val)]; /* * Skip the first time we do an operation, when trk->ops @@ -642,7 +708,8 @@ worker(void *arg) * for several operations, confirming that the * next key is in the correct order. */ - ret = do_range_reads(wtperf, cursor); + ret = do_range_reads(wtperf, + cursor, workload->read_range); } if (ret == 0 || ret == WT_NOTFOUND) @@ -689,7 +756,7 @@ worker(void *arg) */ strncpy(value_buf, value, opts->value_sz_max - 1); - if (thread->workload->update_delta != 0) + if (workload->update_delta != 0) update_value_delta(thread); if (value_buf[0] == 'a') value_buf[0] = 'b'; @@ -806,7 +873,7 @@ op_err: if (ret == WT_ROLLBACK && ops_per_txn != 0) { /* Schedule the next operation */ if (++op == op_end) - op = thread->workload->ops; + op = workload->ops; /* * Decrement throttle ops and check if we should sleep @@ -843,7 +910,7 @@ run_mix_schedule_op(WORKLOAD *workp, int op, int64_t op_cnt) uint8_t *p, *end; /* Jump around the array to roughly spread out the operations. */ - jump = 100 / op_cnt; + jump = (int)(100 / op_cnt); /* * Find a read operation and replace it with another operation. This @@ -884,17 +951,6 @@ run_mix_schedule(WTPERF *wtperf, WORKLOAD *workp) opts = wtperf->opts; - /* Confirm reads, inserts, truncates and updates cannot all be zero. */ - if (workp->insert == 0 && workp->read == 0 && - workp->truncate == 0 && workp->update == 0) { - lprintf(wtperf, EINVAL, 0, "no operations scheduled"); - return (EINVAL); - } - - /* - * Handle truncate first - it's a special case that can't be used in - * a mixed workload. - */ if (workp->truncate != 0) { if (workp->insert != 0 || workp->read != 0 || workp->update != 0) { @@ -906,6 +962,12 @@ run_mix_schedule(WTPERF *wtperf, WORKLOAD *workp) return (0); } + /* Confirm reads, inserts and updates cannot all be zero. */ + if (workp->insert == 0 && workp->read == 0 && workp->update == 0) { + lprintf(wtperf, EINVAL, 0, "no operations scheduled"); + return (EINVAL); + } + /* * Check for a simple case where the thread is only doing insert or * update operations (because the default operation for a @@ -2244,6 +2306,8 @@ start_run(WTPERF *wtperf) opts->checkpoint_threads, checkpoint_worker) != 0) goto err; } + if (opts->pre_load_data && (ret = pre_load_data(wtperf)) != 0) + goto err; /* Execute the workload. */ if ((ret = execute_workload(wtperf)) != 0) goto err; @@ -2827,13 +2891,42 @@ static uint64_t wtperf_rand(WTPERF_THREAD *thread) { CONFIG_OPTS *opts; + WT_CURSOR *rnd_cursor; WTPERF *wtperf; double S1, S2, U; uint64_t rval; + int ret; + char *key_buf; wtperf = thread->wtperf; opts = wtperf->opts; + /* + * If we have a random cursor set up then use it. + */ + if ((rnd_cursor = thread->rand_cursor) != NULL) { + if ((ret = rnd_cursor->next(rnd_cursor))) { + lprintf(wtperf, ret, 0, "worker: rand next failed"); + /* 0 is outside the expected range. */ + return (0); + } + if ((ret = rnd_cursor->get_key(rnd_cursor, &key_buf)) != 0) { + lprintf(wtperf, ret, 0, + "worker: rand next key retrieval"); + return (0); + } + /* + * Resetting the cursor is not fatal. We still return the + * value we retrieved above. We do it so that we don't + * leave a cursor positioned. + */ + if ((ret = rnd_cursor->reset(rnd_cursor)) != 0) + lprintf(wtperf, ret, 0, + "worker: rand cursor reset failed"); + extract_key(key_buf, &rval); + return (rval); + } + /* * Use WiredTiger's random number routine: it's lock-free and fairly * good. diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h index 81d74e134f6..db88d0b0271 100644 --- a/bench/wtperf/wtperf.h +++ b/bench/wtperf/wtperf.h @@ -66,6 +66,9 @@ typedef struct { uint64_t throttle; /* Maximum operations/second */ /* Number of operations per transaction. Zero for autocommit */ int64_t ops_per_txn; + int64_t pause; /* Time between scans */ + int64_t read_range; /* Range of reads */ + int32_t table_index; /* Table to focus ops on */ int64_t truncate; /* Truncate ratio */ uint64_t truncate_pct; /* Truncate Percent */ uint64_t truncate_count; /* Truncate Count */ @@ -225,6 +228,7 @@ typedef struct { struct __wtperf_thread { /* Per-thread structure */ WTPERF *wtperf; /* Enclosing configuration */ + WT_CURSOR *rand_cursor; /* Random key cursor */ WT_RAND_STATE rnd; /* Random number generation state */ @@ -241,6 +245,7 @@ struct __wtperf_thread { /* Per-thread structure */ TRACK ckpt; /* Checkpoint operations */ TRACK insert; /* Insert operations */ TRACK read; /* Read operations */ + TRACK scan; /* Scan operations */ TRACK update; /* Update operations */ TRACK truncate; /* Truncate operations */ TRACK truncate_sleep; /* Truncate sleep operations */ diff --git a/bench/wtperf/wtperf_opt.i b/bench/wtperf/wtperf_opt.i index 680eb53a90e..63cef4c28fb 100644 --- a/bench/wtperf/wtperf_opt.i +++ b/bench/wtperf/wtperf_opt.i @@ -145,12 +145,13 @@ DEF_OPT_AS_UINT32(populate_ops_per_txn, 0, "phase, zero for auto-commit") DEF_OPT_AS_UINT32(populate_threads, 1, "number of populate threads, 1 for bulk load") +DEF_OPT_AS_BOOL(pre_load_data, 0, + "Scan all data prior to starting the workload phase to warm the cache") DEF_OPT_AS_UINT32(random_range, 0, "if non zero choose a value from within this range as the key for " "insert operations") DEF_OPT_AS_BOOL(random_value, 0, "generate random content for the value") DEF_OPT_AS_BOOL(range_partition, 0, "partition data by range (vs hash)") -DEF_OPT_AS_UINT32(read_range, 0, "scan a range of keys after each search") DEF_OPT_AS_BOOL(readonly, 0, "reopen the connection between populate and workload phases in readonly " "mode. Requires reopen_connection turned on (default). Requires that " @@ -192,9 +193,10 @@ DEF_OPT_AS_STRING(threads, "", "workload configuration: each 'count' " "'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))' " "which would create 2 threads doing nothing but reads and 8 threads " "each doing 50% inserts and 25% reads and updates. Allowed configuration " - "values are 'count', 'throttle', 'update_delta', 'reads', 'inserts', " - "'updates', 'truncate', 'truncate_pct' and 'truncate_count'. There are " - "also behavior modifiers, supported modifiers are 'ops_per_txn'") + "values are 'count', 'throttle', 'update_delta', 'reads', 'read_range', " + "'inserts', 'updates', 'truncate', 'truncate_pct' and 'truncate_count'. " + "There are also behavior modifiers, supported modifiers are " + "'ops_per_txn'") DEF_OPT_AS_CONFIG_STRING(transaction_config, "", "WT_SESSION.begin_transaction configuration string, applied during the " "populate phase when populate_ops_per_txn is nonzero") diff --git a/src/docs/wtperf.dox b/src/docs/wtperf.dox index 83aadf8a776..2eac0fef3f4 100644 --- a/src/docs/wtperf.dox +++ b/src/docs/wtperf.dox @@ -195,14 +195,14 @@ use pareto distribution for random numbers. Zero to disable, otherwise a percen number of operations to group into each transaction in the populate phase, zero for auto-commit @par populate_threads (unsigned int, default=1) number of populate threads, 1 for bulk load +@par pre_load_data (boolean, default=false) +Scan all data prior to starting the workload phase to warm the cache @par random_range (unsigned int, default=0) if non zero choose a value from within this range as the key for insert operations @par random_value (boolean, default=false) generate random content for the value @par range_partition (boolean, default=false) partition data by range (vs hash) -@par read_range (unsigned int, default=0) -scan a range of keys after each search @par readonly (boolean, default=false) reopen the connection between populate and workload phases in readonly mode. Requires reopen_connection turned on (default). Requires that read be the only workload specified @par reopen_connection (boolean, default=true) @@ -228,7 +228,7 @@ number of tables to run operations over. Keys are divided evenly over the table @par table_count_idle (unsigned int, default=0) number of tables to create, that won't be populated. Default 0. @par threads (string, default="") -workload configuration: each 'count' entry is the total number of threads, and the 'insert', 'read' and 'update' entries are the ratios of insert, read and update operations done by each worker thread; If a throttle value is provided each thread will do a maximum of that number of operations per second; multiple workload configurations may be specified per threads configuration; for example, a more complex threads configuration might be 'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))' which would create 2 threads doing nothing but reads and 8 threads each doing 50% inserts and 25% reads and updates. Allowed configuration values are 'count', 'throttle', 'update_delta', 'reads', 'inserts', 'updates', 'truncate', 'truncate_pct' and 'truncate_count'. There are also behavior modifiers, supported modifiers are 'ops_per_txn' +workload configuration: each 'count' entry is the total number of threads, and the 'insert', 'read' and 'update' entries are the ratios of insert, read and update operations done by each worker thread; If a throttle value is provided each thread will do a maximum of that number of operations per second; multiple workload configurations may be specified per threads configuration; for example, a more complex threads configuration might be 'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))' which would create 2 threads doing nothing but reads and 8 threads each doing 50% inserts and 25% reads and updates. Allowed configuration values are 'count', 'throttle', 'update_delta', 'reads', 'read_range', 'inserts', 'updates', 'truncate', 'truncate_pct' and 'truncate_count'. There are also behavior modifiers, supported modifiers are 'ops_per_txn' @par transaction_config (string, default="") WT_SESSION.begin_transaction configuration string, applied during the populate phase when populate_ops_per_txn is nonzero @par table_name (string, default="test") -- cgit v1.2.1 From 17ec908453f8dae29d18cd8ba172360ef0473c8f Mon Sep 17 00:00:00 2001 From: sueloverso Date: Thu, 2 Feb 2017 14:01:31 -0500 Subject: WT-3157 Fix checkpoint error path (#3274) --- src/txn/txn_ckpt.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 7b33b0c7788..90804db3240 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -181,7 +181,7 @@ __checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[], int (*op)(WT_SESSION_IMPL *, const char *[])) { WT_DECL_RET; - u_int i; + u_int i, j; /* If we have already locked the handles, apply the operation. */ for (i = 0; i < session->ckpt_handle_next; ++i) { @@ -189,10 +189,22 @@ __checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[], continue; WT_WITH_DHANDLE(session, session->ckpt_handle[i], ret = (*op)(session, cfg)); - WT_RET(ret); + WT_ERR(ret); } - return (0); +err: + /* + * If we have an error somewhere in processing the handles, then + * we need to mark earlier trees dirty. + */ + if (ret != 0) + for (j = 0; j < i; ++j) { + if (session->ckpt_handle[j] == NULL) + continue; + WT_WITH_DHANDLE(session, session->ckpt_handle[j], + S2BT(session)->modified = true); + } + return (ret); } /* @@ -824,7 +836,7 @@ err: /* * overwritten the checkpoint, so what ends up on disk is not * consistent. */ - if (ret != 0 && !conn->modified) + if (ret != 0) conn->modified = true; session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED; @@ -1340,7 +1352,6 @@ __checkpoint_tree( WT_DATA_HANDLE *dhandle; WT_DECL_RET; WT_LSN ckptlsn; - int was_modified; bool fake_ckpt; WT_UNUSED(cfg); @@ -1351,7 +1362,6 @@ __checkpoint_tree( conn = S2C(session); dhandle = session->dhandle; fake_ckpt = false; - was_modified = btree->modified; /* * Set the checkpoint LSN to the maximum LSN so that if logging is @@ -1482,10 +1492,9 @@ err: /* * If the checkpoint didn't complete successfully, make sure the * tree is marked dirty. */ - if (ret != 0 && !btree->modified && was_modified) { + if (ret != 0) { btree->modified = true; - if (!S2C(session)->modified) - S2C(session)->modified = true; + S2C(session)->modified = true; } __wt_meta_ckptlist_free(session, ckptbase); -- cgit v1.2.1 From 009959863f181a07d6c5bb73bcd0e4f1fded7b78 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Fri, 3 Feb 2017 12:57:31 +1100 Subject: WT-3150 Fix: don't spin forever during eviction interrupts. (#3276) --- src/evict/evict_lru.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 3cb513fd87b..a071730d4bd 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -319,7 +319,9 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) * otherwise be busy so don't go to sleep. */ if (was_intr) { - while (cache->pass_intr != 0) + while (cache->pass_intr != 0 && + F_ISSET(conn, WT_CONN_EVICTION_RUN) && + F_ISSET(thread, WT_THREAD_RUN)) __wt_yield(); continue; } -- cgit v1.2.1 From 6df1a46875156202f560d6d173ba0be7afe8ca98 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Fri, 3 Feb 2017 15:45:32 +1100 Subject: WT-3148 Improve efficiency of eviction with many small trees. (#3264) --- src/evict/evict_lru.c | 95 ++++++++++++++++++++++++++++++++++++++------------- src/include/btree.i | 22 ++++++++++++ src/include/extern.h | 1 + src/support/rand.c | 12 +++++++ 4 files changed, 106 insertions(+), 24 deletions(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index a071730d4bd..2b7b46e19fa 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1559,6 +1559,19 @@ __evict_walk_file(WT_SESSION_IMPL *session, start = queue->evict_queue + *slotp; remaining_slots = max_entries - *slotp; total_slots = max_entries - queue->evict_entries; + btree_inuse = cache_inuse = 0; + target_pages_clean = target_pages_dirty = 0; + + /* + * The number of times we should fill the queue by the end of + * considering all trees. + */ +#define QUEUE_FILLS_PER_PASS 10 + + /* + * The minimum number of pages we should consider per tree. + */ +#define MIN_PAGES_PER_TREE 10 /* * The target number of pages for this tree is proportional to the @@ -1567,13 +1580,12 @@ __evict_walk_file(WT_SESSION_IMPL *session, * cache (and only have to walk it once). */ if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN)) { - btree_inuse = __wt_btree_bytes_inuse(session); + btree_inuse = __wt_btree_bytes_evictable(session); cache_inuse = __wt_cache_bytes_inuse(cache); bytes_per_slot = 1 + cache_inuse / total_slots; target_pages_clean = (uint32_t)( (btree_inuse + bytes_per_slot / 2) / bytes_per_slot); - } else - target_pages_clean = 0; + } if (F_ISSET(cache, WT_CACHE_EVICT_DIRTY)) { btree_inuse = __wt_btree_dirty_leaf_inuse(session); @@ -1581,35 +1593,58 @@ __evict_walk_file(WT_SESSION_IMPL *session, bytes_per_slot = 1 + cache_inuse / total_slots; target_pages_dirty = (uint32_t)( (btree_inuse + bytes_per_slot / 2) / bytes_per_slot); - } else - target_pages_dirty = 0; + } - target_pages = WT_MAX(target_pages_clean, target_pages_dirty); + /* + * Weight the number of target pages by the number of times we want to + * fill the cache per pass through all the trees. Note that we don't + * build this into the calculation above because we don't want to favor + * small trees, so round to a whole number of slots (zero for small + * trees) before multiplying. + */ + target_pages = WT_MAX(target_pages_clean, target_pages_dirty) * + QUEUE_FILLS_PER_PASS; + /* + * Randomly walk trees with a small fraction of the cache in case there + * are so many trees that none of them use enough of the cache to be + * allocated slots. + * + * The chance of walking a tree is equal to the chance that a random + * byte in cache belongs to the tree, weighted by how many times we + * want to fill queues during a pass through all the trees in cache. + */ if (target_pages == 0) { - /* - * Randomly walk trees with a tiny fraction of the cache in - * case there are so many trees that none of them use enough of - * the cache to be allocated slots. Walk small trees 1% of the - * time. - */ - if (__wt_random(&session->rnd) > UINT32_MAX / 100) + if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN)) { + btree_inuse = __wt_btree_bytes_evictable(session); + cache_inuse = __wt_cache_bytes_inuse(cache); + } else { + btree_inuse = __wt_btree_dirty_leaf_inuse(session); + cache_inuse = __wt_cache_dirty_leaf_inuse(cache); + } + if (btree_inuse == 0 || cache_inuse == 0) + return (0); + if (__wt_random64(&session->rnd) % cache_inuse > + btree_inuse * QUEUE_FILLS_PER_PASS) return (0); - target_pages = 10; } + /* + * There is some cost associated with walking a tree. If we're going + * to visit this tree, always look for a minimum number of pages. + */ + if (target_pages < MIN_PAGES_PER_TREE) + target_pages = MIN_PAGES_PER_TREE; + + /* + * If the tree is dead or we're near the end of the queue, fill the + * remaining slots. + */ if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || target_pages > remaining_slots) target_pages = remaining_slots; end = start + target_pages; - walk_flags = - WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT; - - /* Randomize the walk direction. */ - if (btree->evict_walk_reverse) - FLD_SET(walk_flags, WT_READ_PREV); - /* * Examine at least a reasonable number of pages before deciding * whether to give up. When we are only looking for dirty pages, @@ -1620,6 +1655,13 @@ __evict_walk_file(WT_SESSION_IMPL *session, !F_ISSET(cache, WT_CACHE_EVICT_CLEAN)) min_pages *= 10; + walk_flags = + WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT; + + /* Randomize the walk direction. */ + if (btree->evict_walk_reverse) + FLD_SET(walk_flags, WT_READ_PREV); + /* * Get some more eviction candidate pages. * @@ -1752,12 +1794,17 @@ fast: /* If the page can't be evicted, give up. */ session, cache_eviction_pages_queued, (u_int)(evict - start)); /* - * If we didn't find any candidates in the file, reverse the direction - * of the walk and skip it next time. + * If gave up the walk, reverse the direction of the walk and skip it + * next time. */ if (give_up) btree->evict_walk_reverse = !btree->evict_walk_reverse; - if (pages_queued == 0 && !urgent_queued) + + /* + * If we couldn't find the number of pages we were looking for, skip + * the tree next time. + */ + if (pages_queued < target_pages / 2 && !urgent_queued) btree->evict_walk_period = WT_MIN( WT_MAX(1, 2 * btree->evict_walk_period), 100); else if (pages_queued == target_pages) diff --git a/src/include/btree.i b/src/include/btree.i index 09fa8df8c56..1e971fa81c9 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -70,6 +70,28 @@ __wt_btree_bytes_inuse(WT_SESSION_IMPL *session) return (__wt_cache_bytes_plus_overhead(cache, btree->bytes_inmem)); } +/* + * __wt_btree_bytes_evictable -- + * Return the number of bytes that can be evicted (i.e. bytes apart from + * the pinned root page). + */ +static inline uint64_t +__wt_btree_bytes_evictable(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + WT_CACHE *cache; + uint64_t bytes_inmem, bytes_root; + + btree = S2BT(session); + cache = S2C(session)->cache; + + bytes_inmem = btree->bytes_inmem; + bytes_root = btree->root.page->memory_footprint; + + return (bytes_inmem <= bytes_root ? 0 : + __wt_cache_bytes_plus_overhead(cache, bytes_inmem - bytes_root)); +} + /* * __wt_btree_dirty_inuse -- * Return the number of dirty bytes in use. diff --git a/src/include/extern.h b/src/include/extern.h index d7d58c58048..863d2a02861 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -688,6 +688,7 @@ extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2) WT_GCC_FUNC_DECL_ATTRIBUT extern void __wt_random_init(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_random_init_seed( WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern uint32_t __wt_random(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern uint64_t __wt_random64(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/support/rand.c b/src/support/rand.c index a5b229b9abc..4fae43edc8e 100644 --- a/src/support/rand.c +++ b/src/support/rand.c @@ -120,3 +120,15 @@ __wt_random(WT_RAND_STATE volatile * rnd_state) return ((z << 16) + (w & 65535)); } + +/* + * __wt_random64 -- + * Return a 64-bit pseudo-random number. + */ +uint64_t +__wt_random64(WT_RAND_STATE volatile * rnd_state) + WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) +{ + return (((uint64_t)__wt_random(rnd_state) << 32) + + __wt_random(rnd_state)); +} -- cgit v1.2.1 From de3424c0bca2d7660acaff17383e05849d164a16 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Fri, 3 Feb 2017 17:00:41 +1100 Subject: WT-3148 Check that we have a root page when calculating evictable size. --- src/include/btree.i | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/include/btree.i b/src/include/btree.i index 1e971fa81c9..378d93dd2ee 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -80,13 +80,15 @@ __wt_btree_bytes_evictable(WT_SESSION_IMPL *session) { WT_BTREE *btree; WT_CACHE *cache; + WT_PAGE *root_page; uint64_t bytes_inmem, bytes_root; btree = S2BT(session); cache = S2C(session)->cache; + root_page = btree->root.page; bytes_inmem = btree->bytes_inmem; - bytes_root = btree->root.page->memory_footprint; + bytes_root = root_page == NULL ? 0 : root_page->memory_footprint; return (bytes_inmem <= bytes_root ? 0 : __wt_cache_bytes_plus_overhead(cache, bytes_inmem - bytes_root)); -- cgit v1.2.1 From b2173f8f063b1528dcd086f00ca8cf072f0445d0 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sun, 5 Feb 2017 19:55:36 -0500 Subject: WT-3111 util_create() doesnt free memory assigned to "uri" (#3279) Always print an error message if a WT_SESSION method fails, we don't know if the WiredTiger library printed out a message or not. Free memory allocated by util_uri() in some cases where we either didn't have the necessary free call, or simply returned without freeing memory. Try and be more consistent with error messages, use the leading call as the first string, and any arguments to that call as the second. Replace some of the places we're writing to stderr explicitly with the utility error handlers. Initialize the return variable from util_uri() in all cases. Change error messages that referenced WT_SESSION.open to reference WT_SESSION.open_cursor. --- bench/wtperf/wtperf.c | 2 +- bench/wtperf/wtperf.h | 1 - src/evict/evict_lru.c | 5 ++--- src/utilities/util.h | 2 +- src/utilities/util_alter.c | 9 ++++++--- src/utilities/util_compact.c | 14 +++----------- src/utilities/util_create.c | 12 +++++++----- src/utilities/util_drop.c | 10 ++++++---- src/utilities/util_dump.c | 26 +++++++++++++------------- src/utilities/util_list.c | 21 ++++++++++----------- src/utilities/util_load.c | 2 +- src/utilities/util_load_json.c | 2 +- src/utilities/util_loadtext.c | 13 +++++++++---- src/utilities/util_main.c | 4 ++-- src/utilities/util_printlog.c | 14 +++----------- src/utilities/util_read.c | 19 +++++++++++++------ src/utilities/util_rebalance.c | 30 +++++++++++++----------------- src/utilities/util_rename.c | 15 ++++----------- src/utilities/util_salvage.c | 30 +++++++++++++----------------- src/utilities/util_stat.c | 6 +++--- src/utilities/util_truncate.c | 11 ++++++----- src/utilities/util_upgrade.c | 30 +++++++++++++----------------- src/utilities/util_verify.c | 34 +++++++++++++++------------------- src/utilities/util_write.c | 20 +++++++++++++------- 24 files changed, 158 insertions(+), 174 deletions(-) diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 044fd38dc06..7f5e5ad3373 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -2905,7 +2905,7 @@ wtperf_rand(WTPERF_THREAD *thread) * If we have a random cursor set up then use it. */ if ((rnd_cursor = thread->rand_cursor) != NULL) { - if ((ret = rnd_cursor->next(rnd_cursor))) { + if ((ret = rnd_cursor->next(rnd_cursor)) != 0) { lprintf(wtperf, ret, 0, "worker: rand next failed"); /* 0 is outside the expected range. */ return (0); diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h index db88d0b0271..3efb8ab700e 100644 --- a/bench/wtperf/wtperf.h +++ b/bench/wtperf/wtperf.h @@ -245,7 +245,6 @@ struct __wtperf_thread { /* Per-thread structure */ TRACK ckpt; /* Checkpoint operations */ TRACK insert; /* Insert operations */ TRACK read; /* Read operations */ - TRACK scan; /* Scan operations */ TRACK update; /* Update operations */ TRACK truncate; /* Truncate operations */ TRACK truncate_sleep; /* Truncate sleep operations */ diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 2b7b46e19fa..db39a5acdee 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -198,8 +198,7 @@ __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref) } __wt_spin_unlock(session, &cache->evict_queues[q].evict_lock); } - WT_ASSERT(session, - !F_ISSET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU)); + WT_ASSERT(session, !F_ISSET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU)); __wt_spin_unlock(session, &cache->evict_queue_lock); } @@ -1781,7 +1780,7 @@ fast: /* If the page can't be evicted, give up. */ ++pages_queued; if (WT_PAGE_IS_INTERNAL(page)) - ++internal_pages; + ++internal_pages; __wt_verbose(session, WT_VERB_EVICTSERVER, "select: %p, size %" WT_SIZET_FMT, diff --git a/src/utilities/util.h b/src/utilities/util.h index cf12d7d4aa6..93a96d44219 100644 --- a/src/utilities/util.h +++ b/src/utilities/util.h @@ -40,7 +40,6 @@ int util_flush(WT_SESSION *, const char *); int util_list(WT_SESSION *, int, char *[]); int util_load(WT_SESSION *, int, char *[]); int util_loadtext(WT_SESSION *, int, char *[]); -char *util_name(WT_SESSION *, const char *, const char *); int util_printlog(WT_SESSION *, int, char *[]); int util_read(WT_SESSION *, int, char *[]); int util_read_line(WT_SESSION *, ULINE *, bool, bool *); @@ -51,5 +50,6 @@ int util_stat(WT_SESSION *, int, char *[]); int util_str2recno(WT_SESSION *, const char *p, uint64_t *recnop); int util_truncate(WT_SESSION *, int, char *[]); int util_upgrade(WT_SESSION *, int, char *[]); +char *util_uri(WT_SESSION *, const char *, const char *); int util_verify(WT_SESSION *, int, char *[]); int util_write(WT_SESSION *, int, char *[]); diff --git a/src/utilities/util_alter.c b/src/utilities/util_alter.c index d228c15cd48..ef01a1ed826 100644 --- a/src/utilities/util_alter.c +++ b/src/utilities/util_alter.c @@ -34,9 +34,12 @@ util_alter(WT_SESSION *session, int argc, char *argv[]) for (configp = argv; configp != NULL && *configp != NULL; configp += 2) if ((ret = session->alter( - session, configp[0], configp[1])) != 0) - break; - return (ret); + session, configp[0], configp[1])) != 0) { + (void)util_err(session, ret, + "session.alter: %s, %s", configp[0], configp[1]); + return (1); + } + return (0); } static int diff --git a/src/utilities/util_compact.c b/src/utilities/util_compact.c index c114eb207fa..e469b4dce6e 100644 --- a/src/utilities/util_compact.c +++ b/src/utilities/util_compact.c @@ -30,21 +30,13 @@ util_compact(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the table name. */ if (argc != 1) return (usage()); - if ((uri = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - if ((ret = session->compact(session, uri, NULL)) != 0) { - fprintf(stderr, "%s: compact(%s): %s\n", - progname, uri, session->strerror(session, ret)); - goto err; - } - - if (0) { -err: ret = 1; - } + if ((ret = session->compact(session, uri, NULL)) != 0) + (void)util_err(session, ret, "session.compact: %s", uri); free(uri); - return (ret); } diff --git a/src/utilities/util_create.c b/src/utilities/util_create.c index 4e609736f2d..7c22a67792b 100644 --- a/src/utilities/util_create.c +++ b/src/utilities/util_create.c @@ -15,9 +15,9 @@ util_create(WT_SESSION *session, int argc, char *argv[]) { WT_DECL_RET; int ch; - const char *config, *uri; + char *config, *uri; - config = NULL; + config = uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "c:")) != EOF) switch (ch) { case 'c': /* command-line configuration */ @@ -35,12 +35,14 @@ util_create(WT_SESSION *session, int argc, char *argv[]) if (argc != 1) return (usage()); - if ((uri = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); if ((ret = session->create(session, uri, config)) != 0) - return (util_err(session, ret, "%s: session.create", uri)); - return (0); + (void)util_err(session, ret, "session.create: %s", uri); + + free(uri); + return (ret); } static int diff --git a/src/utilities/util_drop.c b/src/utilities/util_drop.c index ba41445dfb6..456005d445d 100644 --- a/src/utilities/util_drop.c +++ b/src/utilities/util_drop.c @@ -15,8 +15,9 @@ util_drop(WT_SESSION *session, int argc, char *argv[]) { WT_DECL_RET; int ch; - char *name; + char *uri; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF) switch (ch) { case '?': @@ -30,12 +31,13 @@ util_drop(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the uri. */ if (argc != 1) return (usage()); - if ((name = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - ret = session->drop(session, name, "force"); + if ((ret = session->drop(session, uri, "force")) != 0) + (void)util_err(session, ret, "session.drop: %s", uri); - free(name); + free(uri); return (ret); } diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c index 3f8b4a49dfe..cded40a8b45 100644 --- a/src/utilities/util_dump.c +++ b/src/utilities/util_dump.c @@ -37,10 +37,10 @@ util_dump(WT_SESSION *session, int argc, char *argv[]) size_t len; int ch, i; bool hex, json, reverse; - char *checkpoint, *config, *name, *p, *simplename; + char *checkpoint, *config, *p, *simpleuri, *uri; hex = json = reverse = false; - checkpoint = config = name = simplename = NULL; + checkpoint = config = simpleuri = uri = NULL; cursor = NULL; while ((ch = __wt_getopt(progname, argc, argv, "c:f:jrx")) != EOF) switch (ch) { @@ -89,11 +89,11 @@ util_dump(WT_SESSION *session, int argc, char *argv[]) if (json && i > 0) if (dump_json_separator(session) != 0) goto err; - free(name); - free(simplename); - name = simplename = NULL; + free(uri); + free(simpleuri); + uri = simpleuri = NULL; - if ((name = util_name(session, argv[i], "table")) == NULL) + if ((uri = util_uri(session, argv[i], "table")) == NULL) goto err; len = @@ -113,19 +113,19 @@ util_dump(WT_SESSION *session, int argc, char *argv[]) (void)strcat(config, json ? "dump=json" : (hex ? "dump=hex" : "dump=print")); if ((ret = session->open_cursor( - session, name, NULL, config, &cursor)) != 0) { + session, uri, NULL, config, &cursor)) != 0) { fprintf(stderr, "%s: cursor open(%s) failed: %s\n", - progname, name, session->strerror(session, ret)); + progname, uri, session->strerror(session, ret)); goto err; } - if ((simplename = strdup(name)) == NULL) { + if ((simpleuri = strdup(uri)) == NULL) { (void)util_err(session, errno, NULL); goto err; } - if ((p = strchr(simplename, '(')) != NULL) + if ((p = strchr(simpleuri, '(')) != NULL) *p = '\0'; - if (dump_config(session, simplename, cursor, hex, json) != 0) + if (dump_config(session, simpleuri, cursor, hex, json) != 0) goto err; if (dump_record(cursor, reverse, json) != 0) @@ -148,8 +148,8 @@ err: ret = 1; } free(config); - free(name); - free(simplename); + free(uri); + free(simpleuri); if (cursor != NULL && (ret = cursor->close(cursor)) != 0) { (void)util_err(session, ret, NULL); ret = 1; diff --git a/src/utilities/util_list.c b/src/utilities/util_list.c index e91dbfce05b..f19ba4d1f97 100644 --- a/src/utilities/util_list.c +++ b/src/utilities/util_list.c @@ -19,10 +19,10 @@ util_list(WT_SESSION *session, int argc, char *argv[]) WT_DECL_RET; int ch; bool cflag, vflag; - char *name; + char *uri; cflag = vflag = false; - name = NULL; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "cv")) != EOF) switch (ch) { case 'c': @@ -42,17 +42,16 @@ util_list(WT_SESSION *session, int argc, char *argv[]) case 0: break; case 1: - if ((name = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); break; default: return (usage()); } - ret = list_print(session, name, cflag, vflag); - - free(name); + ret = list_print(session, uri, cflag, vflag); + free(uri); return (ret); } @@ -99,7 +98,7 @@ list_get_allocsize(WT_SESSION *session, const char *key, size_t *allocsize) * List the high-level objects in the database. */ static int -list_print(WT_SESSION *session, const char *name, bool cflag, bool vflag) +list_print(WT_SESSION *session, const char *uri, bool cflag, bool vflag) { WT_CURSOR *cursor; WT_DECL_RET; @@ -120,7 +119,7 @@ list_print(WT_SESSION *session, const char *name, bool cflag, bool vflag) ret, "%s: WT_SESSION.open_cursor", WT_METADATA_URI)); } - found = name == NULL; + found = uri == NULL; while ((ret = cursor->next(cursor)) == 0) { /* Get the key. */ if ((ret = cursor->get_key(cursor, &key)) != 0) @@ -129,8 +128,8 @@ list_print(WT_SESSION *session, const char *name, bool cflag, bool vflag) /* * If a name is specified, only show objects that match. */ - if (name != NULL) { - if (!WT_PREFIX_MATCH(key, name)) + if (uri != NULL) { + if (!WT_PREFIX_MATCH(key, uri)) continue; found = true; } @@ -161,7 +160,7 @@ list_print(WT_SESSION *session, const char *name, bool cflag, bool vflag) if (ret != WT_NOTFOUND) return (util_cerr(cursor, "next", ret)); if (!found) { - fprintf(stderr, "%s: %s: not found\n", progname, name); + fprintf(stderr, "%s: %s: not found\n", progname, uri); return (1); } diff --git a/src/utilities/util_load.c b/src/utilities/util_load.c index ac18df80851..ca77643eb49 100644 --- a/src/utilities/util_load.c +++ b/src/utilities/util_load.c @@ -126,7 +126,7 @@ load_dump(WT_SESSION *session) append ? ",append" : "", no_overwrite ? ",overwrite=false" : ""); if ((ret = session->open_cursor( session, uri, NULL, config, &cursor)) != 0) { - ret = util_err(session, ret, "%s: session.open", uri); + ret = util_err(session, ret, "%s: session.open_cursor", uri); goto err; } diff --git a/src/utilities/util_load_json.c b/src/utilities/util_load_json.c index 020a4ed9ba9..1189d49a483 100644 --- a/src/utilities/util_load_json.c +++ b/src/utilities/util_load_json.c @@ -242,7 +242,7 @@ json_data(WT_SESSION *session, LF_ISSET(LOAD_JSON_NO_OVERWRITE) ? ",overwrite=false" : ""); if ((ret = session->open_cursor( session, uri, NULL, config, &cursor)) != 0) { - ret = util_err(session, ret, "%s: session.open", uri); + ret = util_err(session, ret, "%s: session.open_cursor", uri); goto err; } keyformat = cursor->key_format; diff --git a/src/utilities/util_loadtext.c b/src/utilities/util_loadtext.c index f9c5b6e9a1f..7602d43f8c9 100644 --- a/src/utilities/util_loadtext.c +++ b/src/utilities/util_loadtext.c @@ -15,9 +15,11 @@ static int usage(void); int util_loadtext(WT_SESSION *session, int argc, char *argv[]) { + WT_DECL_RET; int ch; - const char *uri; + char *uri; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "f:")) != EOF) switch (ch) { case 'f': /* input file */ @@ -35,10 +37,13 @@ util_loadtext(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the uri. */ if (argc != 1) return (usage()); - if ((uri = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - return (text(session, uri)); + ret = text(session, uri); + + free(uri); + return (ret); } /* @@ -61,7 +66,7 @@ text(WT_SESSION *session, const char *uri) */ if ((ret = session->open_cursor( session, uri, NULL, "append,overwrite", &cursor)) != 0) - return (util_err(session, ret, "%s: session.open", uri)); + return (util_err(session, ret, "%s: session.open_cursor", uri)); /* * We're about to load strings, make sure the formats match. diff --git a/src/utilities/util_main.c b/src/utilities/util_main.c index 001a66d6d9e..7157f0d90fe 100644 --- a/src/utilities/util_main.c +++ b/src/utilities/util_main.c @@ -285,11 +285,11 @@ usage(void) } /* - * util_name -- + * util_uri -- * Build a name. */ char * -util_name(WT_SESSION *session, const char *s, const char *type) +util_uri(WT_SESSION *session, const char *s, const char *type) { size_t len; char *name; diff --git a/src/utilities/util_printlog.c b/src/utilities/util_printlog.c index e7fa2134934..5f3ed43905b 100644 --- a/src/utilities/util_printlog.c +++ b/src/utilities/util_printlog.c @@ -14,8 +14,8 @@ int util_printlog(WT_SESSION *session, int argc, char *argv[]) { WT_DECL_RET; - int ch; uint32_t flags; + int ch; flags = 0; while ((ch = __wt_getopt(progname, argc, argv, "f:x")) != EOF) @@ -41,17 +41,9 @@ util_printlog(WT_SESSION *session, int argc, char *argv[]) if (argc != 0) return (usage()); - ret = __wt_txn_printlog(session, flags); - - if (ret != 0) { - fprintf(stderr, "%s: printlog failed: %s\n", - progname, session->strerror(session, ret)); - goto err; - } + if ((ret = __wt_txn_printlog(session, flags)) != 0) + (void)util_err(session, ret, "printlog"); - if (0) { -err: ret = 1; - } return (ret); } diff --git a/src/utilities/util_read.c b/src/utilities/util_read.c index 2e766377aa9..393949b6a1c 100644 --- a/src/utilities/util_read.c +++ b/src/utilities/util_read.c @@ -18,8 +18,9 @@ util_read(WT_SESSION *session, int argc, char *argv[]) uint64_t recno; int ch; bool rkey, rval; - const char *uri, *value; + char *uri, *value; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF) switch (ch) { case '?': @@ -32,13 +33,19 @@ util_read(WT_SESSION *session, int argc, char *argv[]) /* The remaining arguments are a uri followed by a list of keys. */ if (argc < 2) return (usage()); - if ((uri = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - /* Open the object. */ - if ((ret = session->open_cursor( - session, uri, NULL, NULL, &cursor)) != 0) - return (util_err(session, ret, "%s: session.open", uri)); + /* + * Open the object; free allocated memory immediately to simplify + * future error handling. + */ + if ((ret = + session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) + (void)util_err(session, ret, "%s: session.open_cursor", uri); + free(uri); + if (ret != 0) + return (ret); /* * A simple search only makes sense if the key format is a string or a diff --git a/src/utilities/util_rebalance.c b/src/utilities/util_rebalance.c index 45f161487e5..c188ea17d22 100644 --- a/src/utilities/util_rebalance.c +++ b/src/utilities/util_rebalance.c @@ -15,9 +15,9 @@ util_rebalance(WT_SESSION *session, int argc, char *argv[]) { WT_DECL_RET; int ch; - char *name; + char *uri; - name = NULL; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF) switch (ch) { case '?': @@ -30,25 +30,21 @@ util_rebalance(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the table name. */ if (argc != 1) return (usage()); - if ((name = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - if ((ret = session->rebalance(session, name, NULL)) != 0) { - fprintf(stderr, "%s: rebalance(%s): %s\n", - progname, name, session->strerror(session, ret)); - goto err; + if ((ret = session->rebalance(session, uri, NULL)) != 0) + (void)util_err(session, ret, "session.rebalance: %s", uri); + else { + /* + * Verbose configures a progress counter, move to the next + * line. + */ + if (verbose) + printf("\n"); } - /* Verbose configures a progress counter, move to the next line. */ - if (verbose) - printf("\n"); - - if (0) { -err: ret = 1; - } - - free(name); - + free(uri); return (ret); } diff --git a/src/utilities/util_rename.c b/src/utilities/util_rename.c index aee299c6e63..bb2d40cd103 100644 --- a/src/utilities/util_rename.c +++ b/src/utilities/util_rename.c @@ -30,22 +30,15 @@ util_rename(WT_SESSION *session, int argc, char *argv[]) /* The remaining arguments are the object uri and new name. */ if (argc != 2) return (usage()); - if ((uri = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); newuri = argv[1]; - if ((ret = session->rename(session, uri, newuri, NULL)) != 0) { - fprintf(stderr, "%s: rename %s to %s: %s\n", - progname, uri, newuri, session->strerror(session, ret)); - goto err; - } - - if (0) { -err: ret = 1; - } + if ((ret = session->rename(session, uri, newuri, NULL)) != 0) + (void)util_err( + session, ret, "session.rename: %s, %s", uri, newuri); free(uri); - return (ret); } diff --git a/src/utilities/util_salvage.c b/src/utilities/util_salvage.c index 679d1074457..6cc2278b846 100644 --- a/src/utilities/util_salvage.c +++ b/src/utilities/util_salvage.c @@ -16,10 +16,10 @@ util_salvage(WT_SESSION *session, int argc, char *argv[]) WT_DECL_RET; int ch; const char *force; - char *name; + char *uri; force = NULL; - name = NULL; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "F")) != EOF) switch (ch) { case 'F': @@ -35,25 +35,21 @@ util_salvage(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the file name. */ if (argc != 1) return (usage()); - if ((name = util_name(session, *argv, "file")) == NULL) + if ((uri = util_uri(session, *argv, "file")) == NULL) return (1); - if ((ret = session->salvage(session, name, force)) != 0) { - fprintf(stderr, "%s: salvage(%s): %s\n", - progname, name, session->strerror(session, ret)); - goto err; + if ((ret = session->salvage(session, uri, force)) != 0) + (void)util_err(session, ret, "session.salvage: %s", uri); + else { + /* + * Verbose configures a progress counter, move to the next + * line. + */ + if (verbose) + printf("\n"); } - /* Verbose configures a progress counter, move to the next line. */ - if (verbose) - printf("\n"); - - if (0) { -err: ret = 1; - } - - free(name); - + free(uri); return (ret); } diff --git a/src/utilities/util_stat.c b/src/utilities/util_stat.c index 4376f559ceb..1b75d9ea8bf 100644 --- a/src/utilities/util_stat.c +++ b/src/utilities/util_stat.c @@ -55,7 +55,7 @@ util_stat(WT_SESSION *session, int argc, char *argv[]) objname = (char *)""; break; case 1: - if ((objname = util_name(session, *argv, "table")) == NULL) + if ((objname = util_uri(session, *argv, "table")) == NULL) return (1); objname_free = true; break; @@ -82,8 +82,8 @@ util_stat(WT_SESSION *session, int argc, char *argv[]) (ret = cursor->next(cursor)) == 0 && (ret = cursor->get_value(cursor, &desc, &pval, NULL)) == 0) if (printf("%s=%s\n", desc, pval) < 0) { - ret = errno; - break; + (void)util_err(session, errno, "printf"); + goto err; } if (ret == WT_NOTFOUND) ret = 0; diff --git a/src/utilities/util_truncate.c b/src/utilities/util_truncate.c index 9325c0d7e84..35de02345c8 100644 --- a/src/utilities/util_truncate.c +++ b/src/utilities/util_truncate.c @@ -15,8 +15,9 @@ util_truncate(WT_SESSION *session, int argc, char *argv[]) { WT_DECL_RET; int ch; - char *name; + char *uri; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF) switch (ch) { case '?': @@ -30,13 +31,13 @@ util_truncate(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the uri. */ if (argc != 1) return (usage()); - if ((name = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - if ((ret = session->truncate(session, name, NULL, NULL, NULL)) != 0) - return (util_err(session, ret, "%s: session.truncate", name)); + if ((ret = session->truncate(session, uri, NULL, NULL, NULL)) != 0) + (void)util_err(session, ret, "session.truncate: %s", uri); - free(name); + free(uri); return (ret); } diff --git a/src/utilities/util_upgrade.c b/src/utilities/util_upgrade.c index 63b23f28c16..f89bd46e133 100644 --- a/src/utilities/util_upgrade.c +++ b/src/utilities/util_upgrade.c @@ -15,9 +15,9 @@ util_upgrade(WT_SESSION *session, int argc, char *argv[]) { WT_DECL_RET; int ch; - char *name; + char *uri; - name = NULL; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "")) != EOF) switch (ch) { case '?': @@ -30,25 +30,21 @@ util_upgrade(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the table name. */ if (argc != 1) return (usage()); - if ((name = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - if ((ret = session->upgrade(session, name, NULL)) != 0) { - fprintf(stderr, "%s: upgrade(%s): %s\n", - progname, name, session->strerror(session, ret)); - goto err; + if ((ret = session->upgrade(session, uri, NULL)) != 0) + (void)util_err(session, ret, "session.upgrade: %s", uri); + else { + /* + * Verbose configures a progress counter, move to the next + * line. + */ + if (verbose) + printf("\n"); } - /* Verbose configures a progress counter, move to the next line. */ - if (verbose) - printf("\n"); - - if (0) { -err: ret = 1; - } - - free(name); - + free(uri); return (ret); } diff --git a/src/utilities/util_verify.c b/src/utilities/util_verify.c index 82bdd780cd3..d0587fcfc8c 100644 --- a/src/utilities/util_verify.c +++ b/src/utilities/util_verify.c @@ -17,10 +17,10 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) size_t size; int ch; bool dump_address, dump_blocks, dump_layout, dump_pages; - char *config, *dump_offsets, *name; + char *config, *dump_offsets, *uri; dump_address = dump_blocks = dump_layout = dump_pages = false; - config = dump_offsets = name = NULL; + config = dump_offsets = uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "d:")) != EOF) switch (ch) { case 'd': @@ -55,7 +55,7 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) /* The remaining argument is the table name. */ if (argc != 1) return (usage()); - if ((name = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); /* Build the configuration string as necessary. */ @@ -69,7 +69,7 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) strlen("dump_offsets[],") + (dump_offsets == NULL ? 0 : strlen(dump_offsets)) + 20; if ((config = malloc(size)) == NULL) { - (void)util_err(session, errno, NULL); + ret = util_err(session, errno, NULL); goto err; } snprintf(config, size, @@ -82,23 +82,19 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) dump_offsets != NULL ? "]," : "", dump_pages ? "dump_pages," : ""); } - if ((ret = session->verify(session, name, config)) != 0) { - fprintf(stderr, "%s: verify(%s): %s\n", - progname, name, session->strerror(session, ret)); - goto err; + if ((ret = session->verify(session, uri, config)) != 0) + (void)util_err(session, ret, "session.verify: %s", uri); + else { + /* + * Verbose configures a progress counter, move to the next + * line. + */ + if (verbose) + printf("\n"); } - /* Verbose configures a progress counter, move to the next line. */ - if (verbose) - printf("\n"); - - if (0) { -err: ret = 1; - } - - free(config); - free(name); - +err: free(config); + free(uri); return (ret); } diff --git a/src/utilities/util_write.c b/src/utilities/util_write.c index 7d9bce02b36..b931fad064d 100644 --- a/src/utilities/util_write.c +++ b/src/utilities/util_write.c @@ -18,10 +18,10 @@ util_write(WT_SESSION *session, int argc, char *argv[]) uint64_t recno; int ch; bool append, overwrite, rkey; - const char *uri; - char config[100]; + char *uri, config[100]; append = overwrite = false; + uri = NULL; while ((ch = __wt_getopt(progname, argc, argv, "ao")) != EOF) switch (ch) { case 'a': @@ -47,15 +47,21 @@ util_write(WT_SESSION *session, int argc, char *argv[]) } else if (argc < 3 || ((argc - 1) % 2 != 0)) return (usage()); - if ((uri = util_name(session, *argv, "table")) == NULL) + if ((uri = util_uri(session, *argv, "table")) == NULL) return (1); - /* Open the object. */ + /* + * Open the object; free allocated memory immediately to simplify + * future error handling. + */ (void)snprintf(config, sizeof(config), "%s,%s", append ? "append=true" : "", overwrite ? "overwrite=true" : ""); - if ((ret = session->open_cursor( - session, uri, NULL, config, &cursor)) != 0) - return (util_err(session, ret, "%s: session.open", uri)); + if ((ret = + session->open_cursor(session, uri, NULL, config, &cursor)) != 0) + (void)util_err(session, ret, "%s: session.open_cursor", uri); + free(uri); + if (ret != 0) + return (ret); /* * A simple search only makes sense if the key format is a string or a -- cgit v1.2.1 From 2185e4206c238389665fa024c3f891160942c04d Mon Sep 17 00:00:00 2001 From: sueloverso Date: Mon, 6 Feb 2017 11:29:25 -0500 Subject: WT-3157 More aggressive error handling. (#3275) * More aggressive error handling. * Alternative checkpoint cleanup. (#3281) --- src/txn/txn_ckpt.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 90804db3240..59dcc23acc5 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -181,7 +181,7 @@ __checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[], int (*op)(WT_SESSION_IMPL *, const char *[])) { WT_DECL_RET; - u_int i, j; + u_int i; /* If we have already locked the handles, apply the operation. */ for (i = 0; i < session->ckpt_handle_next; ++i) { @@ -189,22 +189,10 @@ __checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[], continue; WT_WITH_DHANDLE(session, session->ckpt_handle[i], ret = (*op)(session, cfg)); - WT_ERR(ret); + WT_RET(ret); } -err: - /* - * If we have an error somewhere in processing the handles, then - * we need to mark earlier trees dirty. - */ - if (ret != 0) - for (j = 0; j < i; ++j) { - if (session->ckpt_handle[j] == NULL) - continue; - WT_WITH_DHANDLE(session, session->ckpt_handle[j], - S2BT(session)->modified = true); - } - return (ret); + return (0); } /* @@ -555,7 +543,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) void *saved_meta_next; u_int i; uint64_t fsync_duration_usecs; - bool full, idle, logging, tracking; + bool failed, full, idle, logging, tracking; const char *txn_cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_begin_transaction), "isolation=snapshot", NULL }; @@ -836,12 +824,13 @@ err: /* * overwritten the checkpoint, so what ends up on disk is not * consistent. */ - if (ret != 0) + failed = ret != 0; + if (failed) conn->modified = true; session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED; if (tracking) - WT_TRET(__wt_meta_track_off(session, false, ret != 0)); + WT_TRET(__wt_meta_track_off(session, false, failed)); cache->eviction_scrub_limit = 0.0; WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0); @@ -874,6 +863,13 @@ err: /* for (i = 0; i < session->ckpt_handle_next; ++i) { if (session->ckpt_handle[i] == NULL) continue; + /* + * If the operation failed, mark all trees dirty so they are + * included if a future checkpoint can succeed. + */ + if (failed) + WT_WITH_DHANDLE(session, session->ckpt_handle[i], + S2BT(session)->modified = true); WT_WITH_DHANDLE(session, session->ckpt_handle[i], WT_TRET(__wt_session_release_btree(session))); } -- cgit v1.2.1 From 2a59c1fd79ff98b89046404ccb756114d74fa5f4 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Wed, 8 Feb 2017 01:53:18 -0500 Subject: WT-3161 Panic on a write error in logging. (#3278) It is not possible to continue without risking data loss. --- src/log/log.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/log/log.c b/src/log/log.c index 1482cc0aca1..b07ef8c1bd5 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -62,6 +62,8 @@ static int __log_fs_write(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, wt_off_t offset, size_t len, const void *buf) { + WT_DECL_RET; + /* * If we're writing into a new log file, we have to wait for all * writes to the previous log file to complete otherwise there could @@ -71,7 +73,10 @@ __log_fs_write(WT_SESSION_IMPL *session, __log_wait_for_earlier_slot(session, slot); WT_RET(__wt_log_force_sync(session, &slot->slot_release_lsn)); } - return (__wt_write(session, slot->slot_fh, offset, len, buf)); + if ((ret = __wt_write(session, slot->slot_fh, offset, len, buf)) != 0) + WT_PANIC_MSG(session, ret, + "%s: fatal log failure", slot->slot_fh->name); + return (ret); } /* -- cgit v1.2.1 From 15b7658a380e374e627b86e7629c8fad3ef349dc Mon Sep 17 00:00:00 2001 From: sueloverso Date: Wed, 8 Feb 2017 23:25:22 -0500 Subject: WT-3164 Ensure all relevant btree fields are reset on checkpoint error. (#3283) --- src/txn/txn_ckpt.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 59dcc23acc5..5932e058552 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -524,6 +524,17 @@ __checkpoint_verbose_track(WT_SESSION_IMPL *session, #endif } +/* + * __checkpoint_fail_reset -- + * Reset fields when a failure occurs. + */ +static void +__checkpoint_fail_reset(WT_SESSION_IMPL *session) +{ + S2BT(session)->modified = true; + S2BT(session)->ckpt = NULL; +} + /* * __txn_checkpoint -- * Checkpoint a database or a list of objects in the database. @@ -869,7 +880,7 @@ err: /* */ if (failed) WT_WITH_DHANDLE(session, session->ckpt_handle[i], - S2BT(session)->modified = true); + __checkpoint_fail_reset(session)); WT_WITH_DHANDLE(session, session->ckpt_handle[i], WT_TRET(__wt_session_release_btree(session))); } -- cgit v1.2.1 From 0b9e4534b2e01a7bf3dec00c91d6f38dfbcc0dd0 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 9 Feb 2017 09:15:15 -0500 Subject: WT-3088 bug: WiredTiger can evict the tree's current eviction walk point (#3280) WT-3088 bug: WiredTiger can evict the tree's current eviction walk point --- src/btree/bt_debug.c | 2 -- src/btree/bt_split.c | 74 +++++++++++++++++++++++++++------------------------- src/include/btmem.h | 8 +++--- src/include/btree.i | 4 +-- src/include/extern.h | 1 + 5 files changed, 47 insertions(+), 42 deletions(-) diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index a89eca230fd..d664da2ebd3 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -699,8 +699,6 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) WT_RET(ds->f(ds, ", evict-lru")); if (F_ISSET_ATOMIC(page, WT_PAGE_OVERFLOW_KEYS)) WT_RET(ds->f(ds, ", overflow-keys")); - if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_BLOCK)) - WT_RET(ds->f(ds, ", split-block")); if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_INSERT)) WT_RET(ds->f(ds, ", split-insert")); if (F_ISSET_ATOMIC(page, WT_PAGE_UPDATE_IGNORE)) diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 7cfcd08f931..8122d242666 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -53,6 +53,16 @@ __split_oldest_gen(WT_SESSION_IMPL *session) return (oldest); } +/* + * __wt_split_obsolete -- + * Check if it is safe to free / evict based on split generation. + */ +bool +__wt_split_obsolete(WT_SESSION_IMPL *session, uint64_t split_gen) +{ + return (split_gen < __split_oldest_gen(session)); +} + /* * __split_stash_add -- * Add a new entry into the session's split stash list. @@ -394,8 +404,8 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, * Prepare a set of WT_REFs for a move. */ static void -__split_ref_step1( - WT_SESSION_IMPL *session, WT_PAGE_INDEX *pindex, bool skip_first) +__split_ref_step1(WT_SESSION_IMPL *session, + WT_PAGE_INDEX *pindex, uint64_t split_gen, bool skip_first) { WT_PAGE *child; WT_REF *child_ref, *ref; @@ -418,30 +428,25 @@ __split_ref_step1( child = ref->page; /* - * Block eviction and splits in newly created pages. + * Block eviction in newly created pages. * * Once the split is live, newly created internal pages might be * evicted and their WT_REF structures freed. If that happened * before all threads exit the index of the page that previously * "owned" the WT_REF, a thread might see a freed WT_REF. To - * ensure that doesn't happen, the newly created page's modify - * structure has a field with a transaction ID that's checked - * before any internal page is evicted. Unfortunately, we don't - * know the correct value until we update the original page's - * index (we need a transaction ID from after that update), but - * the act of updating the original page's index is what allows - * the eviction to happen. + * ensure that doesn't happen, the newly created page contains + * the current split generation and can't be evicted until + * all readers have left the old generation. * - * Split blocking was because historic versions of the split - * code didn't update the WT_REF.home field until after the - * split was live, so the WT_REF.home fields being updated could - * split again before the update, there's a race between splits - * as to which would update them first. The current code updates - * the WT_REF.home fields before going live (in this function), - * this shouldn't be an issue, but for now splits remain turned - * off. + * Historic, we also blocked splits in newly created pages + * because we didn't update the WT_REF.home field until after + * the split was live, so the WT_REF.home fields being updated + * could split again before the update, there's a race between + * splits as to which would update them first. The current code + * updates the WT_REF.home fields before going live (in this + * function), this isn't an issue. */ - F_SET_ATOMIC(child, WT_PAGE_SPLIT_BLOCK); + child->pg_intl_split_gen = split_gen; /* * We use a page flag to prevent the child from splitting from @@ -473,7 +478,6 @@ __split_ref_step2( WT_SESSION_IMPL *session, WT_PAGE_INDEX *pindex, bool skip_first) { WT_DECL_RET; - WT_PAGE *child; WT_REF *ref; uint32_t i; @@ -503,14 +507,9 @@ __split_ref_step2( continue; WT_ERR(ret); - child = ref->page; - - /* The child can now be evicted or split. */ - F_CLR_ATOMIC(child, WT_PAGE_SPLIT_BLOCK); - #ifdef HAVE_DIAGNOSTIC WT_WITH_PAGE_INDEX(session, - __split_verify_intl_key_order(session, child)); + __split_verify_intl_key_order(session, ref->page)); #endif WT_ERR(__wt_hazard_clear(session, ref)); @@ -653,8 +652,12 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) /* Start making real changes to the tree, errors are fatal. */ complete = WT_ERR_PANIC; + /* Get a generation for this split, mark the root page. */ + split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); + root->pg_intl_split_gen = split_gen; + /* Prepare the WT_REFs for the move. */ - __split_ref_step1(session, alloc_index, false); + __split_ref_step1(session, alloc_index, split_gen, false); /* * Confirm the root page's index hasn't moved, then update it, which @@ -686,7 +689,6 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) * fails, we don't roll back that change, because threads may already * be using the new index. */ - split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *); WT_TRET(__split_safe_free(session, split_gen, false, pindex, size)); root_decr += size; @@ -838,6 +840,10 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, /* Start making real changes to the tree, errors are fatal. */ complete = WT_ERR_PANIC; + /* Get a generation for this split, mark the parent page. */ + split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); + parent->pg_intl_split_gen = split_gen; + /* * Confirm the parent page's index hasn't moved then update it, which * makes the split visible to threads descending the tree. @@ -908,7 +914,6 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, * * Acquire a new split generation. */ - split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); for (i = 0, deleted_refs = scr->mem; i < deleted_entries; ++i) { next_ref = pindex->index[deleted_refs[i]]; WT_ASSERT(session, next_ref->state == WT_REF_SPLIT); @@ -1160,8 +1165,12 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) /* Start making real changes to the tree, errors are fatal. */ complete = WT_ERR_PANIC; + /* Get a generation for this split, mark the page. */ + split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); + page->pg_intl_split_gen = split_gen; + /* Prepare the WT_REFs for the move. */ - __split_ref_step1(session, alloc_index, true); + __split_ref_step1(session, alloc_index, split_gen, true); /* Split into the parent. */ WT_ERR(__split_parent(session, page_ref, alloc_index->index, @@ -1207,7 +1216,6 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) * back that change, because threads may already be using the new parent * page. */ - split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *); WT_TRET(__split_safe_free(session, split_gen, false, pindex, size)); page_decr += size; @@ -1284,10 +1292,6 @@ __split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock, for (;;) { parent = ref->home; - /* Skip pages that aren't ready to split. */ - if (F_ISSET_ATOMIC(parent, WT_PAGE_SPLIT_BLOCK)) - return (EBUSY); - if (trylock) WT_RET(__wt_try_writelock(session, &parent->page_lock)); else diff --git a/src/include/btmem.h b/src/include/btmem.h index 43c1a309d52..39ca223aebf 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -483,6 +483,7 @@ struct __wt_page { */ struct { WT_REF *parent_ref; /* Parent reference */ + uint64_t split_gen; /* Generation of last split */ struct __wt_page_index { uint32_t entries; @@ -492,6 +493,8 @@ struct __wt_page { } intl; #undef pg_intl_parent_ref #define pg_intl_parent_ref u.intl.parent_ref +#undef pg_intl_split_gen +#define pg_intl_split_gen u.intl.split_gen /* * Macros to copy/set the index because the name is obscured to ensure @@ -593,9 +596,8 @@ struct __wt_page { #define WT_PAGE_DISK_MAPPED 0x04 /* Disk image in mapped memory */ #define WT_PAGE_EVICT_LRU 0x08 /* Page is on the LRU queue */ #define WT_PAGE_OVERFLOW_KEYS 0x10 /* Page has overflow keys */ -#define WT_PAGE_SPLIT_BLOCK 0x20 /* Split blocking eviction and splits */ -#define WT_PAGE_SPLIT_INSERT 0x40 /* A leaf page was split for append */ -#define WT_PAGE_UPDATE_IGNORE 0x80 /* Ignore updates on page discard */ +#define WT_PAGE_SPLIT_INSERT 0x20 /* A leaf page was split for append */ +#define WT_PAGE_UPDATE_IGNORE 0x40 /* Ignore updates on page discard */ uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */ uint8_t unused[2]; /* Unused padding */ diff --git a/src/include/btree.i b/src/include/btree.i index 378d93dd2ee..315efa86fa6 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1348,8 +1348,8 @@ __wt_page_can_evict( * discards its WT_REF array, and a thread traversing the original * parent page index might see a freed WT_REF. */ - if (WT_PAGE_IS_INTERNAL(page) && - F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_BLOCK)) + if (WT_PAGE_IS_INTERNAL(page) && !__wt_split_obsolete( + session, page->pg_intl_split_gen)) return (false); /* diff --git a/src/include/extern.h b/src/include/extern.h index 863d2a02861..836a7cb1ae6 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -160,6 +160,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern bool __wt_split_obsolete(WT_SESSION_IMPL *session, uint64_t split_gen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_split_stash_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_split_stash_discard_all( WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -- cgit v1.2.1 From 722b9d1b3da5dbfc4703f41855ae219df3fc6f57 Mon Sep 17 00:00:00 2001 From: Mark Benvenuto Date: Sat, 11 Feb 2017 08:14:52 -0500 Subject: WT-3173 Add runtime detection for s390x CRC32 hardware support (#3290) --- src/checksum/power8/crc32_wrapper.c | 4 ++-- src/checksum/zseries/crc32-s390x.c | 26 ++++++++++++++++++++++---- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/checksum/power8/crc32_wrapper.c b/src/checksum/power8/crc32_wrapper.c index ddfa2bdaeb8..a9be9ced1c6 100644 --- a/src/checksum/power8/crc32_wrapper.c +++ b/src/checksum/power8/crc32_wrapper.c @@ -1,4 +1,6 @@ #if defined(__powerpc64__) +#include "wt_internal.h" + #define CRC_TABLE #include "crc32_constants.h" @@ -68,8 +70,6 @@ out: } #endif -#include "wt_internal.h" - /* * __wt_checksum_hw -- * WiredTiger: return a checksum for a chunk of memory. diff --git a/src/checksum/zseries/crc32-s390x.c b/src/checksum/zseries/crc32-s390x.c index f77d6768d42..28b46594220 100644 --- a/src/checksum/zseries/crc32-s390x.c +++ b/src/checksum/zseries/crc32-s390x.c @@ -6,8 +6,20 @@ * Author(s): Hendrik Brueckner * */ +#include "wt_internal.h" + #include #include + +#if defined(HAVE_CRC32_HARDWARE) + +#include + +/* RHEL 7 has kernel support, but does not define this constant in the lib c headers. */ +#ifndef HWCAP_S390_VX +#define HWCAP_S390_VX 2048 +#endif + #include "crc32-s390x.h" #include "slicing-consts.h" @@ -69,8 +81,6 @@ unsigned int __wt_crc32c_le(unsigned int crc, const unsigned char *buf, size_t l /* Main CRC-32 functions */ DEFINE_CRC32_VX(__wt_crc32c_le_vx, __wt_crc32c_le_vgfm_16, __wt_crc32c_le) -#include "wt_internal.h" - /* * __wt_checksum_hw -- * WiredTiger: return a checksum for a chunk of memory. @@ -81,6 +91,8 @@ __wt_checksum_hw(const void *chunk, size_t len) return (~__wt_crc32c_le_vx(0xffffffff, chunk, len)); } +#endif + /* * __wt_checksum_init -- * WiredTiger: detect CRC hardware and set the checksum function. @@ -89,8 +101,14 @@ void __wt_checksum_init(void) { #if defined(HAVE_CRC32_HARDWARE) - __wt_process.checksum = __wt_checksum_hw; -#else + unsigned long caps = getauxval(AT_HWCAP); + + if (caps & HWCAP_S390_VX) + __wt_process.checksum = __wt_checksum_hw; + else + __wt_process.checksum = __wt_checksum_sw; + +#else /* !HAVE_CRC32_HARDWARE */ __wt_process.checksum = __wt_checksum_sw; #endif } -- cgit v1.2.1 From 7f5d0f9981214c723f2ed90cf4533887ed406176 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Mon, 13 Feb 2017 10:49:24 +1100 Subject: WT-3170 Change when eviction walk point is saved, cleanup splits. (#3284) * Change how eviction walk point is saved during walk. * After 0b9e453, we no longer need to do any non-DIAGNOSTIC work after completing the split (previously, we had changes to make the newly created split pages evictable, but now they are initially given a generation number which will prevent their eviction until it's OK). Rename __split_ref_step2() to be __split_verify_intl(), and change it to verify all of the internal pages involved in the split. Previously, we only verified the pages we had to read and update anyway. Now we don't have to update any pages and we're only reading pages in DIAGNOSTIC mode, verify all of them. Don't release the hazard pointer explicitly, use the more standard __wt_page_release() call (it should make no difference, it's just a bit more consistent). Rename __split_ref_step1() to be __split_ref_prepare(), there's no longer a step #2. * We don't need to publish WT_BTREE.evict_ref, or use a barrier: in one we're guaranteed that only the writing thread will check the assertion in the discard code (that we're not discarding the eviction's reference), and in the other case we're doing hazard-pointer coupling, which implies there is a barrier in the code path before the page can possibly be discarded by any thread. * Review barriers use in splits. (#3288). In all cases, use the pattern "Update the page index, which includes a barrier to make the split live, switch to benign error mode, then verify the pages involved in the split are correct." --- src/btree/bt_split.c | 164 ++++++++++++++++++++-------------------------- src/btree/bt_walk.c | 4 +- src/evict/evict_lru.c | 49 +++++++++----- src/include/session.h | 2 - src/session/session_api.c | 5 -- 5 files changed, 103 insertions(+), 121 deletions(-) diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 8122d242666..fcb14be7c76 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -197,7 +197,7 @@ __split_safe_free(WT_SESSION_IMPL *session, #ifdef HAVE_DIAGNOSTIC /* * __split_verify_intl_key_order -- - * Verify the key order on an internal page after a split, diagnostic only. + * Verify the key order on an internal page after a split. */ static void __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page) @@ -249,6 +249,42 @@ __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page) break; } } + +/* + * __split_verify_intl -- + * Verify a set of internal pages involved in a split. + */ +static int +__split_verify_intl(WT_SESSION_IMPL *session, + WT_PAGE *page1, WT_PAGE *page2, WT_PAGE *pindex_page, bool skip_first) +{ + WT_DECL_RET; + WT_REF *ref; + + /* The split is complete and live, verify all of the pages involved. */ + if (page1 != NULL) + __split_verify_intl_key_order(session, page1); + if (page2 != NULL) + __split_verify_intl_key_order(session, page2); + + /* Skip the first slot on non-root internal pages, it's not set. */ + WT_INTL_FOREACH_BEGIN(session, pindex_page, ref) { + if (skip_first) { + skip_first = false; + continue; + } + WT_ERR(__wt_page_in(session, ref, WT_READ_NO_EVICT)); + + __split_verify_intl_key_order(session, ref->page); + + WT_ERR(__wt_page_release(session, ref, WT_READ_NO_EVICT)); + } WT_INTL_FOREACH_END; + + return (0); + +err: /* Something really bad just happened. */ + WT_PANIC_RET(session, ret, "fatal error during page split"); +} #endif /* @@ -400,11 +436,11 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, } /* - * __split_ref_step1 -- + * __split_ref_prepare -- * Prepare a set of WT_REFs for a move. */ static void -__split_ref_step1(WT_SESSION_IMPL *session, +__split_ref_prepare(WT_SESSION_IMPL *session, WT_PAGE_INDEX *pindex, uint64_t split_gen, bool skip_first) { WT_PAGE *child; @@ -469,58 +505,6 @@ __split_ref_step1(WT_SESSION_IMPL *session, } } -/* - * __split_ref_step2 -- - * Allow the newly created children to be evicted or split. - */ -static int -__split_ref_step2( - WT_SESSION_IMPL *session, WT_PAGE_INDEX *pindex, bool skip_first) -{ - WT_DECL_RET; - WT_REF *ref; - uint32_t i; - - /* - * The split has gone live, enable eviction and splits on the newly - * created internal pages. - */ - WT_WRITE_BARRIER(); - - for (i = skip_first ? 1 : 0; i < pindex->entries; ++i) { - ref = pindex->index[i]; - - /* - * We don't hold hazard pointers on created pages, they cannot - * be evicted because the page-modify transaction value set as - * they were created prevents eviction. (See above, we reset - * that value as part of fixing up the page.) But, an eviction - * thread might be attempting to evict the page (the WT_REF may - * be WT_REF_LOCKED), or it may be a disk based page (the WT_REF - * may be WT_REF_READING), or it may be in some other state. - * Acquire a hazard pointer for any in-memory pages so we know - * the state of the page. Ignore pages not in-memory (deleted, - * on-disk, being read), there's no in-memory structure to fix. - */ - if ((ret = __wt_page_in(session, - ref, WT_READ_CACHE | WT_READ_NO_EVICT)) == WT_NOTFOUND) - continue; - WT_ERR(ret); - -#ifdef HAVE_DIAGNOSTIC - WT_WITH_PAGE_INDEX(session, - __split_verify_intl_key_order(session, ref->page)); -#endif - - WT_ERR(__wt_hazard_clear(session, ref)); - } - - return (0); - -err: /* Something really bad just happened. */ - WT_PANIC_RET(session, ret, "fatal error resolving a split"); -} - /* * __split_root -- * Split the root page in-memory, deepening the tree. @@ -657,7 +641,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) root->pg_intl_split_gen = split_gen; /* Prepare the WT_REFs for the move. */ - __split_ref_step1(session, alloc_index, split_gen, false); + __split_ref_prepare(session, alloc_index, split_gen, false); /* * Confirm the root page's index hasn't moved, then update it, which @@ -665,19 +649,16 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) */ WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(root) == pindex); WT_INTL_INDEX_SET(root, alloc_index); - -#ifdef HAVE_DIAGNOSTIC - WT_WITH_PAGE_INDEX(session, - __split_verify_intl_key_order(session, root)); -#endif - /* Finalize the WT_REFs we moved. */ - WT_ERR(__split_ref_step2(session, alloc_index, false)); + alloc_index = NULL; /* The split is complete and correct, ignore benign errors. */ complete = WT_ERR_IGNORE; - /* We've installed the allocated page-index, ensure error handling. */ - alloc_index = NULL; +#ifdef HAVE_DIAGNOSTIC + WT_WITH_PAGE_INDEX(session, + ret = __split_verify_intl(session, root, NULL, root, false)); + WT_ERR(ret); +#endif /* * We can't free the previous root's index, there may be threads using @@ -852,11 +833,6 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, WT_INTL_INDEX_SET(parent, alloc_index); alloc_index = NULL; -#ifdef HAVE_DIAGNOSTIC - WT_WITH_PAGE_INDEX(session, - __split_verify_intl_key_order(session, parent)); -#endif - /* * If discarding the page's original WT_REF field, reset it to split. * Threads cursoring through the tree were blocked because that WT_REF @@ -875,18 +851,27 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, __wt_free(session, ref->page_del); } + /* + * Set the discarded WT_REF state to split, ensuring we don't + * race with any discard of the WT_REF deleted fields. + */ WT_PUBLISH(ref->state, WT_REF_SPLIT); - } - /* - * Push out the changes: not required for correctness, but don't let - * threads spin on incorrect page references longer than necessary. - */ - WT_FULL_BARRIER(); + /* + * Push out the change: not required for correctness, but stops + * threads spinning on incorrect page references. + */ + WT_FULL_BARRIER(); + } /* The split is complete and correct, ignore benign errors. */ complete = WT_ERR_IGNORE; +#ifdef HAVE_DIAGNOSTIC + WT_WITH_PAGE_INDEX(session, + __split_verify_intl_key_order(session, parent)); +#endif + /* * !!! * Swapping in the new page index released the page for eviction, we can @@ -1170,34 +1155,27 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) page->pg_intl_split_gen = split_gen; /* Prepare the WT_REFs for the move. */ - __split_ref_step1(session, alloc_index, split_gen, true); + __split_ref_prepare(session, alloc_index, split_gen, true); /* Split into the parent. */ WT_ERR(__split_parent(session, page_ref, alloc_index->index, alloc_index->entries, parent_incr, false, false)); - /* Confirm the page's index hasn't moved, then update it. */ + /* + * Confirm the page's index hasn't moved, then update it, which makes + * the split visible to threads descending the tree. + */ WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(page) == pindex); WT_INTL_INDEX_SET(page, replace_index); -#ifdef HAVE_DIAGNOSTIC - WT_WITH_PAGE_INDEX(session, - __split_verify_intl_key_order(session, parent)); - WT_WITH_PAGE_INDEX(session, - __split_verify_intl_key_order(session, page)); -#endif - - /* Finalize the WT_REFs we moved. */ - WT_ERR(__split_ref_step2(session, alloc_index, true)); - /* The split is complete and correct, ignore benign errors. */ complete = WT_ERR_IGNORE; - /* - * Push out the changes: not required for correctness, but no reason - * to wait. - */ - WT_FULL_BARRIER(); +#ifdef HAVE_DIAGNOSTIC + WT_WITH_PAGE_INDEX(session, + ret = __split_verify_intl(session, parent, page, page, true)); + WT_ERR(ret); +#endif /* * We don't care about the page-index we allocated, all we needed was diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c index 049700952ee..ddaa2e5f70b 100644 --- a/src/btree/bt_walk.c +++ b/src/btree/bt_walk.c @@ -340,9 +340,7 @@ __tree_walk_internal(WT_SESSION_IMPL *session, * Take a copy of any held page and clear the return value. Remember * the hazard pointer we're currently holding. * - * We may be passed a pointer to btree->evict_page that we are clearing - * here. We check when discarding pages that we're not discarding that - * page, so this clear must be done before the page is released. + * Clear the returned value, it makes future error handling easier. */ couple = couple_orig = ref = *refp; *refp = NULL; diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index db39a5acdee..efe056aee02 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -756,7 +756,7 @@ __evict_pass(WT_SESSION_IMPL *session) * Clear a single walk point. */ static int -__evict_clear_walk(WT_SESSION_IMPL *session, bool count_stat) +__evict_clear_walk(WT_SESSION_IMPL *session) { WT_BTREE *btree; WT_CACHE *cache; @@ -773,14 +773,14 @@ __evict_clear_walk(WT_SESSION_IMPL *session, bool count_stat) if ((ref = btree->evict_ref) == NULL) return (0); - if (count_stat) - WT_STAT_CONN_INCR(session, cache_eviction_walks_abandoned); + WT_STAT_CONN_INCR(session, cache_eviction_walks_abandoned); /* - * Clear evict_ref first, in case releasing it forces eviction (we - * assert we never try to evict the current eviction walk point). + * Clear evict_ref before releasing it in case that forces eviction (we + * assert that we never try to evict the current eviction walk point). */ btree->evict_ref = NULL; + WT_WITH_DHANDLE(cache->walk_session, session->dhandle, (ret = __wt_page_release(cache->walk_session, ref, WT_READ_NO_EVICT))); @@ -803,7 +803,7 @@ __evict_clear_all_walks(WT_SESSION_IMPL *session) TAILQ_FOREACH(dhandle, &conn->dhqh, q) if (WT_PREFIX_MATCH(dhandle->name, "file:")) WT_WITH_DHANDLE(session, dhandle, - WT_TRET(__evict_clear_walk(session, true))); + WT_TRET(__evict_clear_walk(session))); return (ret); } @@ -848,7 +848,7 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) /* Clear any existing LRU eviction walk for the file. */ WT_WITH_PASS_LOCK(session, - ret = __evict_clear_walk(session, true)); + ret = __evict_clear_walk(session)); (void)__wt_atomic_subv32(&cache->pass_intr, 1); WT_ERR(ret); @@ -1662,8 +1662,15 @@ __evict_walk_file(WT_SESSION_IMPL *session, FLD_SET(walk_flags, WT_READ_PREV); /* - * Get some more eviction candidate pages. - * + * Get some more eviction candidate pages, starting at the last saved + * point. Clear the saved point immediately, we assert when discarding + * pages we're not discarding an eviction point, so this clear must be + * complete before the page is released. + */ + ref = btree->evict_ref; + btree->evict_ref = NULL; + + /* * !!! Take care terminating this loop. * * Don't make an extra call to __wt_tree_walk after we hit the end of a @@ -1676,7 +1683,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, for (evict = start, pages_queued = pages_seen = refs_walked = 0; evict < end && (ret == 0 || ret == WT_NOTFOUND); ret = __wt_tree_walk_count( - session, &btree->evict_ref, &refs_walked, walk_flags)) { + session, &ref, &refs_walked, walk_flags)) { /* * Check whether we're finding a good ratio of candidates vs * pages seen. Some workloads create "deserts" in trees where @@ -1690,7 +1697,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, if (give_up) break; - if ((ref = btree->evict_ref) == NULL) { + if (ref == NULL) { if (++restarts == 2) break; WT_STAT_CONN_INCR( @@ -1812,6 +1819,8 @@ fast: /* If the page can't be evicted, give up. */ btree->evict_walk_period /= 2; /* + * Give up the walk occasionally. + * * If we happen to end up on the root page or a page requiring urgent * eviction, clear it. We have to track hazard pointers, and the root * page complicates that calculation. @@ -1823,16 +1832,20 @@ fast: /* If the page can't be evicted, give up. */ * If we land on a page requiring forced eviction, move on to the next * page: we want this page evicted as quickly as possible. */ - if ((ref = btree->evict_ref) != NULL) { - /* Give up the walk occasionally. */ + if (ref != NULL) { if (__wt_ref_is_root(ref) || evict == start || give_up || ref->page->read_gen == WT_READGEN_OLDEST || - ref->page->memory_footprint >= btree->splitmempage) - WT_RET(__evict_clear_walk(session, restarts == 0)); - else if (ref->page->read_gen == WT_READGEN_OLDEST) + ref->page->memory_footprint >= btree->splitmempage) { + if (restarts == 0) + WT_STAT_CONN_INCR( + session, cache_eviction_walks_abandoned); + WT_RET(__wt_page_release(cache->walk_session, + ref, WT_READ_NO_EVICT)); + ref = NULL; + } else if (ref->page->read_gen == WT_READGEN_OLDEST) WT_RET_NOTFOUND_OK(__wt_tree_walk_count( - session, &btree->evict_ref, - &refs_walked, walk_flags)); + session, &ref, &refs_walked, walk_flags)); + btree->evict_ref = ref; } WT_STAT_CONN_INCRV(session, cache_eviction_walk, refs_walked); diff --git a/src/include/session.h b/src/include/session.h index 7dd523aea26..085f871a34f 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -52,8 +52,6 @@ struct __wt_session_impl { const char *lastop; /* Last operation */ uint32_t id; /* UID, offset in session array */ - WT_CONDVAR *cond; /* Condition variable */ - WT_EVENT_HANDLER *event_handler;/* Application's event handlers */ WT_DATA_HANDLE *dhandle; /* Current data handle */ diff --git a/src/session/session_api.c b/src/session/session_api.c index 71626e098cb..3a5d06f1b61 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -234,9 +234,6 @@ __session_close(WT_SESSION *wt_session, const char *config) /* Release common session resources. */ WT_TRET(__wt_session_release_resources(session)); - /* Destroy the thread's mutex. */ - WT_TRET(__wt_cond_destroy(session, &session->cond)); - /* The API lock protects opening and closing of sessions. */ __wt_spin_lock(session, &conn->api_lock); @@ -1837,8 +1834,6 @@ __open_session(WT_CONNECTION_IMPL *conn, session_ret->name = NULL; session_ret->id = i; - WT_ERR(__wt_cond_alloc(session, "session", &session_ret->cond)); - if (WT_SESSION_FIRST_USE(session_ret)) __wt_random_init(&session_ret->rnd); -- cgit v1.2.1 From a8fe04026ef55b8f59df24ff75ae151c7c370e2a Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Sun, 12 Feb 2017 20:13:24 -0500 Subject: WT-3135 WT-3159 Fix search_near() with custom collators for index keys of variable length. (#3254) * For checkpoint logging, use a format that ends in 'u' to be compatible with previously created log files. In previous WT versions, these formats end in 'U', and a final 'U' does have a prefixed size. Now, a 'U' in any position has a prefixed size. --- dist/s_string.ok | 1 + dist/s_void | 4 + src/cursor/cur_index.c | 25 +- src/include/packing.i | 7 +- src/txn/txn_log.c | 4 +- test/csuite/Makefile.am | 3 + test/csuite/wt3135_search_near_collator/main.c | 360 +++++++++++++++++++++++++ 7 files changed, 398 insertions(+), 6 deletions(-) create mode 100644 test/csuite/wt3135_search_near_collator/main.c diff --git a/dist/s_string.ok b/dist/s_string.ok index bb0cacd9d5d..d2e9dffaa48 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -1182,6 +1182,7 @@ txt typedef uB uS +ui uint uintmax unbare diff --git a/dist/s_void b/dist/s_void index 4a6b4ad91a2..947153e730b 100755 --- a/dist/s_void +++ b/dist/s_void @@ -87,6 +87,10 @@ func_ok() -e '/int handle_progress$/d' \ -e '/int helium_cursor_reset$/d' \ -e '/int helium_session_verify$/d' \ + -e '/int index_compare_primary$/d' \ + -e '/int index_compare_S$/d' \ + -e '/int index_compare_u$/d' \ + -e '/int index_extractor_u$/d' \ -e '/int log_print_err$/d' \ -e '/int lz4_error$/d' \ -e '/int lz4_pre_size$/d' \ diff --git a/src/cursor/cur_index.c b/src/cursor/cur_index.c index 4786b0524bc..13180efdea4 100644 --- a/src/cursor/cur_index.c +++ b/src/cursor/cur_index.c @@ -240,7 +240,16 @@ __curindex_search(WT_CURSOR *cursor) found_key = child->key; if (found_key.size < cursor->key.size) WT_ERR(WT_NOTFOUND); - found_key.size = cursor->key.size; + + /* + * Custom collators expect to see complete keys, pass an item containing + * all the visible fields so it unpacks correctly. + */ + if (cindex->index->collator != NULL) + WT_ERR(__wt_struct_repack(session, child->key_format, + cindex->iface.key_format, &child->key, &found_key)); + else + found_key.size = cursor->key.size; WT_ERR(__wt_compare( session, cindex->index->collator, &cursor->key, &found_key, &cmp)); @@ -307,8 +316,18 @@ __curindex_search_near(WT_CURSOR *cursor, int *exact) * so we flip the sign of the result to match what callers expect. */ found_key = child->key; - if (found_key.size > cursor->key.size) - found_key.size = cursor->key.size; + if (found_key.size > cursor->key.size) { + /* + * Custom collators expect to see complete keys, pass an item + * containing all the visible fields so it unpacks correctly. + */ + if (cindex->index->collator != NULL) + WT_ERR(__wt_struct_repack(session, + cindex->child->key_format, cindex->iface.key_format, + &child->key, &found_key)); + else + found_key.size = cursor->key.size; + } WT_ERR(__wt_compare( session, cindex->index->collator, &cursor->key, &found_key, exact)); diff --git a/src/include/packing.i b/src/include/packing.i index 17ca261bcfc..8ba3dd536ac 100644 --- a/src/include/packing.i +++ b/src/include/packing.i @@ -168,10 +168,15 @@ next: if (pack->cur == pack->end) (int)(pack->end - pack->orig), pack->orig); return (0); case 'u': - case 'U': /* Special case for items with a size prefix. */ pv->type = (!pv->havesize && *pack->cur != '\0') ? 'U' : 'u'; return (0); + case 'U': + /* + * Don't change the type. 'U' is used internally, so this type + * was already changed to explicitly include the size. + */ + return (0); case 'b': case 'h': case 'i': diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c index 7ad295f421b..2931dc1ce82 100644 --- a/src/txn/txn_log.c +++ b/src/txn/txn_log.c @@ -269,7 +269,7 @@ __wt_txn_checkpoint_logread(WT_SESSION_IMPL *session, WT_ITEM ckpt_snapshot_unused; uint32_t ckpt_file, ckpt_offset; u_int ckpt_nsnapshot_unused; - const char *fmt = WT_UNCHECKED_STRING(IIIU); + const char *fmt = WT_UNCHECKED_STRING(IIIu); if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt, &ckpt_file, &ckpt_offset, @@ -297,7 +297,7 @@ __wt_txn_checkpoint_log( uint8_t *end, *p; size_t recsize; uint32_t i, rectype = WT_LOGREC_CHECKPOINT; - const char *fmt = WT_UNCHECKED_STRING(IIIIU); + const char *fmt = WT_UNCHECKED_STRING(IIIIu); txn = &session->txn; ckpt_lsn = &txn->ckpt_lsn; diff --git a/test/csuite/Makefile.am b/test/csuite/Makefile.am index bcdbf120d67..5167b42b433 100644 --- a/test/csuite/Makefile.am +++ b/test/csuite/Makefile.am @@ -43,6 +43,9 @@ noinst_PROGRAMS += test_wt2999_join_extractor test_wt3120_filesys_SOURCES = wt3120_filesys/main.c noinst_PROGRAMS += test_wt3120_filesys +test_wt3135_search_near_collator_SOURCES = wt3135_search_near_collator/main.c +noinst_PROGRAMS += test_wt3135_search_near_collator + # Run this during a "make check" smoke test. TESTS = $(noinst_PROGRAMS) LOG_COMPILER = $(TEST_WRAPPER) diff --git a/test/csuite/wt3135_search_near_collator/main.c b/test/csuite/wt3135_search_near_collator/main.c new file mode 100644 index 00000000000..3113d29dfa9 --- /dev/null +++ b/test/csuite/wt3135_search_near_collator/main.c @@ -0,0 +1,360 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +/* + * JIRA ticket reference: WT-3135 + * Test case description: Each set of data is ordered and contains + * five elements (0-4). We insert elements 1 and 3, and then do + * search_near and search for each element. For each set of data, we perform + * these tests first using a custom collator, and second using a custom collator + * and extractor. In each case there are index keys having variable length. + * Failure mode: In the reported test case, the custom compare routine is + * given a truncated key to compare, and the unpack functions return errors + * because the truncation appeared in the middle of a key. + */ + +#define TEST_ENTRY_COUNT 5 +typedef const char *TEST_SET[TEST_ENTRY_COUNT]; +static TEST_SET test_sets[] = { + { "0", "01", "012", "0123", "01234" }, + { "A", "B", "C", "D", "E" }, + { "5", "54", "543", "5432", "54321" }, + { "54321", "5433", "544", "55", "6" } +}; +#define TEST_SET_COUNT (sizeof(test_sets) / sizeof(test_sets[0])) + +static bool +item_str_equal(WT_ITEM *item, const char *str) +{ + return (item->size == strlen(str) + 1 && strncmp((char *)item->data, + str, item->size) == 0); +} + +static int +compare_int(int a, int b) +{ + return (a < b ? -1 : (a > b ? 1 : 0)); +} + +static int +index_compare_primary(WT_PACK_STREAM *s1, WT_PACK_STREAM *s2, int *cmp) +{ + int64_t pkey1, pkey2; + int rc1, rc2; + + rc1 = wiredtiger_unpack_int(s1, &pkey1); + rc2 = wiredtiger_unpack_int(s2, &pkey2); + + if (rc1 == 0 && rc2 == 0) + *cmp = compare_int(pkey1, pkey2); + else if (rc1 != 0 && rc2 != 0) + *cmp = 0; + else if (rc1 != 0) + *cmp = -1; + else + *cmp = 1; + return (0); +} + +static int +index_compare_S(WT_COLLATOR *collator, WT_SESSION *session, + const WT_ITEM *key1, const WT_ITEM *key2, int *cmp) +{ + WT_PACK_STREAM *s1, *s2; + const char *skey1, *skey2; + + (void)collator; + + testutil_check(wiredtiger_unpack_start(session, "Si", key1->data, + key1->size, &s1)); + testutil_check(wiredtiger_unpack_start(session, "Si", key2->data, + key2->size, &s2)); + + testutil_check(wiredtiger_unpack_str(s1, &skey1)); + testutil_check(wiredtiger_unpack_str(s2, &skey2)); + + if ((*cmp = strcmp(skey1, skey2)) == 0) + testutil_check(index_compare_primary(s1, s2, cmp)); + + testutil_check(wiredtiger_pack_close(s1, NULL)); + testutil_check(wiredtiger_pack_close(s2, NULL)); + + return (0); +} + +static int +index_compare_u(WT_COLLATOR *collator, WT_SESSION *session, + const WT_ITEM *key1, const WT_ITEM *key2, int *cmp) +{ + WT_ITEM skey1, skey2; + WT_PACK_STREAM *s1, *s2; + + (void)collator; + + testutil_check(wiredtiger_unpack_start(session, "ui", key1->data, + key1->size, &s1)); + testutil_check(wiredtiger_unpack_start(session, "ui", key2->data, + key2->size, &s2)); + + testutil_check(wiredtiger_unpack_item(s1, &skey1)); + testutil_check(wiredtiger_unpack_item(s2, &skey2)); + + if ((*cmp = strcmp(skey1.data, skey2.data)) == 0) + testutil_check(index_compare_primary(s1, s2, cmp)); + + testutil_check(wiredtiger_pack_close(s1, NULL)); + testutil_check(wiredtiger_pack_close(s2, NULL)); + + return (0); +} + +static int +index_extractor_u(WT_EXTRACTOR *extractor, WT_SESSION *session, + const WT_ITEM *key, const WT_ITEM *value, WT_CURSOR *result_cursor) +{ + (void)extractor; + (void)session; + (void)key; + + result_cursor->set_key(result_cursor, value); + return result_cursor->insert(result_cursor); +} + +static WT_COLLATOR collator_S = { index_compare_S, NULL, NULL }; +static WT_COLLATOR collator_u = { index_compare_u, NULL, NULL }; +static WT_EXTRACTOR extractor_u = { index_extractor_u, NULL, NULL }; + +/* + * Check search() and search_near() using the test string indicated + * by test_index. + */ +static void +search_using_str(WT_CURSOR *cursor, TEST_SET test_set, int test_index) +{ + int exact, ret; + const char *result; + const char *str_01, *str_0123, *test_str; + + testutil_assert(test_index >= 0 && test_index <= 4); + str_01 = test_set[1]; + str_0123 = test_set[3]; + test_str = test_set[test_index]; + + cursor->set_key(cursor, test_str); + testutil_check(cursor->search_near(cursor, &exact)); + testutil_check(cursor->get_key(cursor, &result)); + + if (test_index == 0) + testutil_assert(strcmp(result, str_01) == 0 && exact > 0); + else if (test_index == 1) + testutil_assert(strcmp(result, str_01) == 0 && exact == 0); + else if (test_index == 2) + testutil_assert((strcmp(result, str_0123) == 0 && exact > 0) || + (strcmp(result, str_01) == 0 && exact < 0)); + else if (test_index == 3) + testutil_assert(strcmp(result, str_0123) == 0 && exact == 0); + else if (test_index == 4) + testutil_assert(strcmp(result, str_0123) == 0 && exact < 0); + + cursor->set_key(cursor, test_str); + ret = cursor->search(cursor); + + if (test_index == 0 || test_index == 2 || test_index == 4) + testutil_assert(ret == WT_NOTFOUND); + else if (test_index == 1 || test_index == 3) + testutil_assert(ret == 0); +} + +/* + * Check search() and search_near() using the test string indicated + * by test_index against a table containing a variable sized item. + */ +static void +search_using_item(WT_CURSOR *cursor, TEST_SET test_set, int test_index) +{ + WT_ITEM item; + size_t testlen; + int exact, ret; + const char *str_01, *str_0123, *test_str; + + testutil_assert(test_index >= 0 && test_index <= 4); + str_01 = test_set[1]; + str_0123 = test_set[3]; + test_str = test_set[test_index]; + + testlen = strlen(test_str) + 1; + item.data = test_str; + item.size = testlen; + cursor->set_key(cursor, &item); + testutil_check(cursor->search_near(cursor, &exact)); + testutil_check(cursor->get_key(cursor, &item)); + + if (test_index == 0) + testutil_assert(item_str_equal(&item, str_01) && exact > 0); + else if (test_index == 1) + testutil_assert(item_str_equal(&item, str_01) && exact == 0); + else if (test_index == 2) + testutil_assert((item_str_equal(&item, str_0123) && exact > 0) + || (item_str_equal(&item, str_01) && exact < 0)); + else if (test_index == 3) + testutil_assert(item_str_equal(&item, str_0123) && exact == 0); + else if (test_index == 4) + testutil_assert(item_str_equal(&item, str_0123) && exact < 0); + + item.data = test_str; + item.size = testlen; + cursor->set_key(cursor, &item); + ret = cursor->search(cursor); + + if (test_index == 0 || test_index == 2 || test_index == 4) + testutil_assert(ret == WT_NOTFOUND); + else if (test_index == 1 || test_index == 3) + testutil_assert(ret == 0); +} + +/* + * For each set of data, perform tests. + */ +static void +test_one_set(WT_SESSION *session, TEST_SET set) +{ + WT_CURSOR *cursor; + WT_ITEM item; + int32_t i; + + /* + * Part 1: Using a custom collator, insert some elements + * and verify results from search_near. + */ + + testutil_check(session->create(session, + "table:main", "key_format=i,value_format=S,columns=(k,v)")); + testutil_check(session->create(session, + "index:main:def_collator", "columns=(v)")); + testutil_check(session->create(session, + "index:main:custom_collator", + "columns=(v),collator=collator_S")); + + /* Insert only elements #1 and #3. */ + testutil_check(session->open_cursor(session, + "table:main", NULL, NULL, &cursor)); + cursor->set_key(cursor, 0); + cursor->set_value(cursor, set[1]); + testutil_check(cursor->insert(cursor)); + cursor->set_key(cursor, 1); + cursor->set_value(cursor, set[3]); + testutil_check(cursor->insert(cursor)); + testutil_check(cursor->close(cursor)); + + /* Check all elements in def_collator index. */ + testutil_check(session->open_cursor(session, + "index:main:def_collator", NULL, NULL, &cursor)); + for (i = 0; i < (int32_t)TEST_ENTRY_COUNT; i++) + search_using_str(cursor, set, i); + testutil_check(cursor->close(cursor)); + + /* Check all elements in custom_collator index */ + testutil_check(session->open_cursor(session, + "index:main:custom_collator", NULL, NULL, &cursor)); + for (i = 0; i < (int32_t)TEST_ENTRY_COUNT; i++) + search_using_str(cursor, set, i); + testutil_check(cursor->close(cursor)); + + /* + * Part 2: perform the same checks using a custom collator and + * extractor. + */ + testutil_check(session->create(session, + "table:main2", "key_format=i,value_format=u,columns=(k,v)")); + + testutil_check(session->create(session, "index:main2:idx_w_coll", + "key_format=u,collator=collator_u,extractor=extractor_u")); + + testutil_check(session->open_cursor(session, + "table:main2", NULL, NULL, &cursor)); + + memset(&item, 0, sizeof(item)); + item.size = strlen(set[1]) + 1; + item.data = set[1]; + cursor->set_key(cursor, 1); + cursor->set_value(cursor, &item); + testutil_check(cursor->insert(cursor)); + + item.size = strlen(set[3]) + 1; + item.data = set[3]; + cursor->set_key(cursor, 3); + cursor->set_value(cursor, &item); + testutil_check(cursor->insert(cursor)); + + testutil_check(cursor->close(cursor)); + + testutil_check(session->open_cursor(session, + "index:main2:idx_w_coll", NULL, NULL, &cursor)); + for (i = 0; i < (int32_t)TEST_ENTRY_COUNT; i++) + search_using_item(cursor, set, i); + testutil_check(cursor->close(cursor)); + + testutil_check(session->drop(session, "table:main", NULL)); + testutil_check(session->drop(session, "table:main2", NULL)); +} + +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + WT_SESSION *session; + int32_t i; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + testutil_check(wiredtiger_open(opts->home, NULL, "create", + &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + + /* Add any collators and extractors used by tests */ + testutil_check(opts->conn->add_collator(opts->conn, "collator_S", + &collator_S, NULL)); + testutil_check(opts->conn->add_collator(opts->conn, "collator_u", + &collator_u, NULL)); + testutil_check(opts->conn->add_extractor(opts->conn, "extractor_u", + &extractor_u, NULL)); + + for (i = 0; i < (int32_t)TEST_SET_COUNT; i++) { + printf("test set %d\n", i); + test_one_set(session, test_sets[i]); + } + + testutil_check(session->close(session, NULL)); + testutil_cleanup(opts); + return (EXIT_SUCCESS); +} -- cgit v1.2.1 From 2258dac42020b486b78947d434fde72c236d1e48 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 13 Feb 2017 10:02:37 -0500 Subject: WT-3174 Coverity/lint cleanup (#3293) * WT-3174 Coverity/lint cleanup clang38 complaints: wt3135_search_near_collator/main.c:75:22: error: implicit conversion loses integer precision: 'int64_t' (aka 'long') to 'int' [-Werror,-Wshorten-64-to-32] *cmp = compare_int(pkey1, pkey2); ~~~~~~~~~~~ ^~~~~ wt3135_search_near_collator/main.c:75:29: error: implicit conversion loses integer precision: 'int64_t' (aka 'long') to 'int' [-Werror,-Wshorten-64-to-32] *cmp = compare_int(pkey1, pkey2); ~~~~~~~~~~~ ^~~~~ * Coverity complains in __split_root(): dead_error_condition: The switch value complete cannot be WT_ERR_PANIC. CID 1371132 (#1 of 1): Logically dead code (DEADCODE) dead_error_begin: Execution cannot reach this statement: case WT_ERR_PANIC:. Revert a minor part of 7f5d0f9, don't switch to benign error mode (setting WT_ERR_IGNORE) until after the split has been verified in DIAGNOSTIC mode. That makes sense and should make Coverity happy. * Fix type-casting, sizeof()/sizeof() is a size_t. --- src/btree/bt_split.c | 18 +++++++++--------- test/csuite/wt3135_search_near_collator/main.c | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index fcb14be7c76..3142e52be0d 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -651,15 +651,15 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) WT_INTL_INDEX_SET(root, alloc_index); alloc_index = NULL; - /* The split is complete and correct, ignore benign errors. */ - complete = WT_ERR_IGNORE; - #ifdef HAVE_DIAGNOSTIC WT_WITH_PAGE_INDEX(session, ret = __split_verify_intl(session, root, NULL, root, false)); WT_ERR(ret); #endif + /* The split is complete and verified, ignore benign errors. */ + complete = WT_ERR_IGNORE; + /* * We can't free the previous root's index, there may be threads using * it. Add to the session's discard list, to be freed once we know no @@ -864,14 +864,14 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, WT_FULL_BARRIER(); } - /* The split is complete and correct, ignore benign errors. */ - complete = WT_ERR_IGNORE; - #ifdef HAVE_DIAGNOSTIC WT_WITH_PAGE_INDEX(session, __split_verify_intl_key_order(session, parent)); #endif + /* The split is complete and verified, ignore benign errors. */ + complete = WT_ERR_IGNORE; + /* * !!! * Swapping in the new page index released the page for eviction, we can @@ -1168,15 +1168,15 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(page) == pindex); WT_INTL_INDEX_SET(page, replace_index); - /* The split is complete and correct, ignore benign errors. */ - complete = WT_ERR_IGNORE; - #ifdef HAVE_DIAGNOSTIC WT_WITH_PAGE_INDEX(session, ret = __split_verify_intl(session, parent, page, page, true)); WT_ERR(ret); #endif + /* The split is complete and verified, ignore benign errors. */ + complete = WT_ERR_IGNORE; + /* * We don't care about the page-index we allocated, all we needed was * the array of WT_REF structures, which has now been split into the diff --git a/test/csuite/wt3135_search_near_collator/main.c b/test/csuite/wt3135_search_near_collator/main.c index 3113d29dfa9..8783034a7d8 100644 --- a/test/csuite/wt3135_search_near_collator/main.c +++ b/test/csuite/wt3135_search_near_collator/main.c @@ -57,7 +57,7 @@ item_str_equal(WT_ITEM *item, const char *str) } static int -compare_int(int a, int b) +compare_int(int64_t a, int64_t b) { return (a < b ? -1 : (a > b ? 1 : 0)); } @@ -329,7 +329,7 @@ main(int argc, char *argv[]) { TEST_OPTS *opts, _opts; WT_SESSION *session; - int32_t i; + size_t i; opts = &_opts; memset(opts, 0, sizeof(*opts)); @@ -349,8 +349,8 @@ main(int argc, char *argv[]) testutil_check(opts->conn->add_extractor(opts->conn, "extractor_u", &extractor_u, NULL)); - for (i = 0; i < (int32_t)TEST_SET_COUNT; i++) { - printf("test set %d\n", i); + for (i = 0; i < TEST_SET_COUNT; i++) { + printf("test set %" WT_SIZET_FMT "\n", i); test_one_set(session, test_sets[i]); } -- cgit v1.2.1 From dc33b134ea0e231fd87924c6a50e6f8230a7c6bf Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Tue, 14 Feb 2017 14:22:10 +1100 Subject: WT-3175 Don't verify children during splits up the tree. (#3294) Reverts part of 7f5d0f9981214c723f2ed90cf4533887ed406176. Fixes a deadlock in diagnostic mode. Also revert a change that could cause diagnostic code to read pages into cache: we don't want diagnostic adding cache pressure and we already verify pages as they are evicted. --- src/btree/bt_split.c | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 3142e52be0d..45550ff627f 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -251,29 +251,33 @@ __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page) } /* - * __split_verify_intl -- - * Verify a set of internal pages involved in a split. + * __split_verify_root -- + * Verify a root page involved in a split. */ static int -__split_verify_intl(WT_SESSION_IMPL *session, - WT_PAGE *page1, WT_PAGE *page2, WT_PAGE *pindex_page, bool skip_first) +__split_verify_root(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_DECL_RET; WT_REF *ref; /* The split is complete and live, verify all of the pages involved. */ - if (page1 != NULL) - __split_verify_intl_key_order(session, page1); - if (page2 != NULL) - __split_verify_intl_key_order(session, page2); - - /* Skip the first slot on non-root internal pages, it's not set. */ - WT_INTL_FOREACH_BEGIN(session, pindex_page, ref) { - if (skip_first) { - skip_first = false; + __split_verify_intl_key_order(session, page); + + WT_INTL_FOREACH_BEGIN(session, page, ref) { + /* + * An eviction thread might be attempting to evict the page + * (the WT_REF may be WT_REF_LOCKED), or it may be a disk based + * page (the WT_REF may be WT_REF_READING), or it may be in + * some other state. Acquire a hazard pointer for any + * in-memory pages so we know the state of the page. + * + * Ignore pages not in-memory (deleted, on-disk, being read), + * there's no in-memory structure to check. + */ + if ((ret = __wt_page_in(session, + ref, WT_READ_CACHE | WT_READ_NO_EVICT)) == WT_NOTFOUND) continue; - } - WT_ERR(__wt_page_in(session, ref, WT_READ_NO_EVICT)); + WT_ERR(ret); __split_verify_intl_key_order(session, ref->page); @@ -653,7 +657,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) #ifdef HAVE_DIAGNOSTIC WT_WITH_PAGE_INDEX(session, - ret = __split_verify_intl(session, root, NULL, root, false)); + ret = __split_verify_root(session, root)); WT_ERR(ret); #endif @@ -1170,8 +1174,9 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) #ifdef HAVE_DIAGNOSTIC WT_WITH_PAGE_INDEX(session, - ret = __split_verify_intl(session, parent, page, page, true)); - WT_ERR(ret); + __split_verify_intl_key_order(session, parent)); + WT_WITH_PAGE_INDEX(session, + __split_verify_intl_key_order(session, page)); #endif /* The split is complete and verified, ignore benign errors. */ -- cgit v1.2.1 From 5b16ddd3815fb043061ac35151e277b919d7e463 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Tue, 14 Feb 2017 15:32:43 +1100 Subject: WT-3152 Switch the table lock to a rwlock. (#3291) --- dist/flags.py | 3 +- dist/s_define.list | 2 + src/conn/conn_handle.c | 4 +- src/cursor/cur_table.c | 2 +- src/include/connection.h | 2 +- src/include/flags.h | 23 +++++----- src/include/schema.h | 104 +++++++++++++++++++++++++++++++++------------- src/schema/schema_list.c | 2 +- src/session/session_api.c | 15 +++---- src/txn/txn_ckpt.c | 2 +- 10 files changed, 106 insertions(+), 53 deletions(-) diff --git a/dist/flags.py b/dist/flags.py index 216f7c29e0a..b20a7181532 100644 --- a/dist/flags.py +++ b/dist/flags.py @@ -123,7 +123,8 @@ flags = { 'SESSION_LOCKED_PASS', 'SESSION_LOCKED_SCHEMA', 'SESSION_LOCKED_SLOT', - 'SESSION_LOCKED_TABLE', + 'SESSION_LOCKED_TABLE_READ', + 'SESSION_LOCKED_TABLE_WRITE', 'SESSION_LOCKED_TURTLE', 'SESSION_LOGGING_INMEM', 'SESSION_LOOKASIDE_CURSOR', diff --git a/dist/s_define.list b/dist/s_define.list index 53a3df87615..8911d888077 100644 --- a/dist/s_define.list +++ b/dist/s_define.list @@ -39,6 +39,8 @@ WT_PADDING_CHECK WT_READ_BARRIER WT_REF_SIZE WT_SESSION_LOCKED_CHECKPOINT +WT_SESSION_LOCKED_TABLE_READ +WT_SESSION_LOCKED_TABLE_WRITE WT_SESSION_LOCKED_TURTLE WT_SIZE_CHECK WT_STATS_FIELD_TO_OFFSET diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index 4f8d89fa9d2..287e9ca7b99 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -59,12 +59,12 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_SPIN_INIT_TRACKED(session, &conn->metadata_lock, metadata); WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure")); WT_SPIN_INIT_TRACKED(session, &conn->schema_lock, schema); - WT_SPIN_INIT_TRACKED(session, &conn->table_lock, table); WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file")); /* Read-write locks */ __wt_rwlock_init(session, &conn->dhandle_lock); __wt_rwlock_init(session, &conn->hot_backup_lock); + __wt_rwlock_init(session, &conn->table_lock); WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS, &conn->page_lock)); for (i = 0; i < WT_PAGE_LOCKS; ++i) @@ -142,7 +142,7 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_spin_destroy(session, &conn->metadata_lock); __wt_spin_destroy(session, &conn->reconfig_lock); __wt_spin_destroy(session, &conn->schema_lock); - __wt_spin_destroy(session, &conn->table_lock); + __wt_rwlock_destroy(session, &conn->table_lock); __wt_spin_destroy(session, &conn->turtle_lock); for (i = 0; i < WT_PAGE_LOCKS; ++i) __wt_spin_destroy(session, &conn->page_lock[i]); diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c index 76f7fc5865f..7e8cd153d2d 100644 --- a/src/cursor/cur_table.c +++ b/src/cursor/cur_table.c @@ -769,7 +769,7 @@ __curtable_complete(WT_SESSION_IMPL *session, WT_TABLE *table) return (0); /* If the table is incomplete, wait on the table lock and recheck. */ - WT_WITH_TABLE_LOCK(session, complete = table->cg_complete); + WT_WITH_TABLE_READ_LOCK(session, complete = table->cg_complete); if (!complete) WT_RET_MSG(session, EINVAL, "'%s' not available until all column groups are created", diff --git a/src/include/connection.h b/src/include/connection.h index 3a719e59608..ce483d3291a 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -171,7 +171,7 @@ struct __wt_connection_impl { WT_SPINLOCK metadata_lock; /* Metadata update spinlock */ WT_SPINLOCK reconfig_lock; /* Single thread reconfigure */ WT_SPINLOCK schema_lock; /* Schema operation spinlock */ - WT_SPINLOCK table_lock; /* Table creation spinlock */ + WT_RWLOCK table_lock; /* Table list lock */ WT_SPINLOCK turtle_lock; /* Turtle file spinlock */ WT_RWLOCK dhandle_lock; /* Data handle list lock */ diff --git a/src/include/flags.h b/src/include/flags.h index 5219bf33ed6..c1fff920e3b 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -59,17 +59,18 @@ #define WT_SESSION_LOCKED_PASS 0x00000040 #define WT_SESSION_LOCKED_SCHEMA 0x00000080 #define WT_SESSION_LOCKED_SLOT 0x00000100 -#define WT_SESSION_LOCKED_TABLE 0x00000200 -#define WT_SESSION_LOCKED_TURTLE 0x00000400 -#define WT_SESSION_LOGGING_INMEM 0x00000800 -#define WT_SESSION_LOOKASIDE_CURSOR 0x00001000 -#define WT_SESSION_NO_CACHE 0x00002000 -#define WT_SESSION_NO_DATA_HANDLES 0x00004000 -#define WT_SESSION_NO_EVICTION 0x00008000 -#define WT_SESSION_NO_LOGGING 0x00010000 -#define WT_SESSION_NO_SCHEMA_LOCK 0x00020000 -#define WT_SESSION_QUIET_CORRUPT_FILE 0x00040000 -#define WT_SESSION_SERVER_ASYNC 0x00080000 +#define WT_SESSION_LOCKED_TABLE_READ 0x00000200 +#define WT_SESSION_LOCKED_TABLE_WRITE 0x00000400 +#define WT_SESSION_LOCKED_TURTLE 0x00000800 +#define WT_SESSION_LOGGING_INMEM 0x00001000 +#define WT_SESSION_LOOKASIDE_CURSOR 0x00002000 +#define WT_SESSION_NO_CACHE 0x00004000 +#define WT_SESSION_NO_DATA_HANDLES 0x00008000 +#define WT_SESSION_NO_EVICTION 0x00010000 +#define WT_SESSION_NO_LOGGING 0x00020000 +#define WT_SESSION_NO_SCHEMA_LOCK 0x00040000 +#define WT_SESSION_QUIET_CORRUPT_FILE 0x00080000 +#define WT_SESSION_SERVER_ASYNC 0x00100000 #define WT_STAT_CLEAR 0x00000001 #define WT_STAT_JSON 0x00000002 #define WT_STAT_ON_CLOSE 0x00000004 diff --git a/src/include/schema.h b/src/include/schema.h index fff57951c0e..9a6e1e54e80 100644 --- a/src/include/schema.h +++ b/src/include/schema.h @@ -78,10 +78,13 @@ struct __wt_table { */ #define WT_COLGROUPS(t) WT_MAX((t)->ncolgroups, 1) -/* Make it simple to check a generic locked state on the handle list lock */ +/* Helpers for the locked state of the handle list and table locks. */ #define WT_SESSION_LOCKED_HANDLE_LIST \ (WT_SESSION_LOCKED_HANDLE_LIST_READ | \ WT_SESSION_LOCKED_HANDLE_LIST_WRITE) +#define WT_SESSION_LOCKED_TABLE \ + (WT_SESSION_LOCKED_TABLE_READ | \ + WT_SESSION_LOCKED_TABLE_WRITE) /* * WT_WITH_LOCK_WAIT -- @@ -90,7 +93,7 @@ struct __wt_table { #define WT_WITH_LOCK_WAIT(session, lock, flag, op) do { \ if (F_ISSET(session, (flag))) { \ op; \ - } else { \ + } else { \ __wt_spin_lock_track(session, lock); \ F_SET(session, (flag)); \ op; \ @@ -139,7 +142,7 @@ struct __wt_table { #define WT_WITH_HANDLE_LIST_READ_LOCK(session, op) do { \ if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)) { \ op; \ - } else { \ + } else { \ __wt_readlock(session, &S2C(session)->dhandle_lock); \ F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ op; \ @@ -150,15 +153,14 @@ struct __wt_table { /* * WT_WITH_HANDLE_LIST_WRITE_LOCK -- - * Acquire the data handle list lock in shared mode, perform an operation, - * drop the lock. The handle list lock is a read-write lock so the - * implementation is different to the other lock macros. - * Automatically upgrade from a read lock if held. + * Acquire the data handle list lock in exclusive mode, perform an + * operation, drop the lock. The handle list lock is a read-write lock so + * the implementation is different to the other lock macros. */ #define WT_WITH_HANDLE_LIST_WRITE_LOCK(session, op) do { \ if (F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)) { \ op; \ - } else { \ + } else { \ WT_ASSERT(session, \ !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ));\ __wt_writelock(session, &S2C(session)->dhandle_lock); \ @@ -201,22 +203,58 @@ struct __wt_table { } while (0) /* - * WT_WITH_TABLE_LOCK, WT_WITH_TABLE_LOCK_NOWAIT -- + * WT_WITH_TABLE_READ_LOCK, WT_WITH_TABLE_WRITE_LOCK, + * WT_WITH_TABLE_WRITE_LOCK_NOWAIT -- * Acquire the table lock, perform an operation, drop the lock. + * The table lock is a read-write lock so the implementation is different + * to most other lock macros. + * + * Note: readlock always waits because some operations need the table lock + * to discard handles, and we only expect it to be held across short + * operations. */ -#define WT_WITH_TABLE_LOCK(session, op) do { \ - WT_ASSERT(session, \ - F_ISSET(session, WT_SESSION_LOCKED_TABLE) || \ - !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \ - WT_WITH_LOCK_WAIT(session, \ - &S2C(session)->table_lock, WT_SESSION_LOCKED_TABLE, op); \ +#define WT_WITH_TABLE_READ_LOCK(session, op) do { \ + if (F_ISSET(session, WT_SESSION_LOCKED_TABLE)) { \ + op; \ + } else { \ + WT_ASSERT(session, \ + !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \ + __wt_readlock(session, &S2C(session)->table_lock); \ + F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \ + __wt_readunlock(session, &S2C(session)->table_lock); \ + } \ } while (0) -#define WT_WITH_TABLE_LOCK_NOWAIT(session, ret, op) do { \ + +#define WT_WITH_TABLE_WRITE_LOCK(session, op) do { \ + if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \ + op; \ + } else { \ + WT_ASSERT(session, \ + !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | \ + WT_SESSION_LOCKED_HANDLE_LIST)); \ + __wt_writelock(session, &S2C(session)->table_lock); \ + F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + __wt_writeunlock(session, &S2C(session)->table_lock); \ + } \ +} while (0) +#define WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret, op) do { \ WT_ASSERT(session, \ - F_ISSET(session, WT_SESSION_LOCKED_TABLE) || \ - !F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST)); \ - WT_WITH_LOCK_NOWAIT(session, ret, \ - &S2C(session)->table_lock, WT_SESSION_LOCKED_TABLE, op); \ + F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE) || \ + !F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ | \ + WT_SESSION_LOCKED_HANDLE_LIST)); \ + if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \ + op; \ + } else if ((ret = __wt_try_writelock(session, \ + &S2C(session)->table_lock)) == 0) { \ + F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + op; \ + F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + __wt_writeunlock(session, &S2C(session)->table_lock); \ + } \ } while (0) /* @@ -232,8 +270,10 @@ struct __wt_table { F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ bool __handle_write_locked = \ F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ - bool __table_locked = \ - F_ISSET(session, WT_SESSION_LOCKED_TABLE); \ + bool __table_read_locked = \ + F_ISSET(session, WT_SESSION_LOCKED_TABLE_READ); \ + bool __table_write_locked = \ + F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ bool __schema_locked = \ F_ISSET(session, WT_SESSION_LOCKED_SCHEMA); \ if (__handle_read_locked) { \ @@ -244,9 +284,13 @@ struct __wt_table { F_CLR(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ __wt_writeunlock(session, &__conn->dhandle_lock); \ } \ - if (__table_locked) { \ - F_CLR(session, WT_SESSION_LOCKED_TABLE); \ - __wt_spin_unlock(session, &__conn->table_lock); \ + if (__table_read_locked) { \ + F_CLR(session, WT_SESSION_LOCKED_TABLE_READ); \ + __wt_readunlock(session, &__conn->table_lock); \ + } \ + if (__table_write_locked) { \ + F_CLR(session, WT_SESSION_LOCKED_TABLE_WRITE); \ + __wt_writeunlock(session, &__conn->table_lock); \ } \ if (__schema_locked) { \ F_CLR(session, WT_SESSION_LOCKED_SCHEMA); \ @@ -265,9 +309,13 @@ struct __wt_table { __wt_spin_lock(session, &__conn->schema_lock); \ F_SET(session, WT_SESSION_LOCKED_SCHEMA); \ } \ - if (__table_locked) { \ - __wt_spin_lock(session, &__conn->table_lock); \ - F_SET(session, WT_SESSION_LOCKED_TABLE); \ + if (__table_read_locked) { \ + __wt_readlock(session, &__conn->table_lock); \ + F_SET(session, WT_SESSION_LOCKED_TABLE_READ); \ + } \ + if (__table_write_locked) { \ + __wt_writelock(session, &__conn->table_lock); \ + F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ } \ if (__handle_read_locked) { \ __wt_readlock(session, &__conn->dhandle_lock); \ diff --git a/src/schema/schema_list.c b/src/schema/schema_list.c index ea7374b7554..74ef5135a4a 100644 --- a/src/schema/schema_list.c +++ b/src/schema/schema_list.c @@ -25,7 +25,7 @@ __schema_add_table(WT_SESSION_IMPL *session, /* Make sure the metadata is open before getting other locks. */ WT_RET(__wt_metadata_cursor(session, NULL)); - WT_WITH_TABLE_LOCK(session, + WT_WITH_TABLE_READ_LOCK(session, ret = __wt_schema_open_table( session, name, namelen, ok_incomplete, &table)); WT_RET(ret); diff --git a/src/session/session_api.c b/src/session/session_api.c index 3a5d06f1b61..d282c5d0c32 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -162,7 +162,7 @@ __session_alter(WT_SESSION *wt_session, const char *uri, const char *config) cfg[1] = NULL; WT_WITH_CHECKPOINT_LOCK(session, WT_WITH_SCHEMA_LOCK(session, - WT_WITH_TABLE_LOCK(session, + WT_WITH_TABLE_WRITE_LOCK(session, ret = __wt_schema_alter(session, uri, cfg)))); err: if (ret != 0) @@ -518,7 +518,7 @@ __wt_session_create( WT_DECL_RET; WT_WITH_SCHEMA_LOCK(session, - WT_WITH_TABLE_LOCK(session, + WT_WITH_TABLE_WRITE_LOCK(session, ret = __wt_schema_create(session, uri, config))); return (ret); } @@ -766,7 +766,7 @@ __session_rename(WT_SESSION *wt_session, WT_WITH_CHECKPOINT_LOCK(session, WT_WITH_SCHEMA_LOCK(session, - WT_WITH_TABLE_LOCK(session, + WT_WITH_TABLE_WRITE_LOCK(session, ret = __wt_schema_rename(session, uri, newuri, cfg)))); err: if (ret != 0) @@ -855,21 +855,22 @@ __session_drop(WT_SESSION *wt_session, const char *uri, const char *config) if (lock_wait) WT_WITH_CHECKPOINT_LOCK(session, WT_WITH_SCHEMA_LOCK(session, - WT_WITH_TABLE_LOCK(session, ret = + WT_WITH_TABLE_WRITE_LOCK(session, ret = __wt_schema_drop(session, uri, cfg)))); else WT_WITH_CHECKPOINT_LOCK_NOWAIT(session, ret, WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret, - WT_WITH_TABLE_LOCK_NOWAIT(session, ret, ret = + WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret, + ret = __wt_schema_drop(session, uri, cfg)))); } else { if (lock_wait) WT_WITH_SCHEMA_LOCK(session, - WT_WITH_TABLE_LOCK(session, + WT_WITH_TABLE_WRITE_LOCK(session, ret = __wt_schema_drop(session, uri, cfg))); else WT_WITH_SCHEMA_LOCK_NOWAIT(session, ret, - WT_WITH_TABLE_LOCK_NOWAIT(session, ret, + WT_WITH_TABLE_WRITE_LOCK_NOWAIT(session, ret, ret = __wt_schema_drop(session, uri, cfg))); } diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 5932e058552..3261c8089f4 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -650,7 +650,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) */ WT_ASSERT(session, session->ckpt_handle_next == 0); WT_WITH_SCHEMA_LOCK(session, - WT_WITH_TABLE_LOCK(session, + WT_WITH_TABLE_READ_LOCK(session, ret = __checkpoint_apply_all( session, cfg, __wt_checkpoint_get_handles, NULL))); WT_ERR(ret); -- cgit v1.2.1 From 988c297f22bbce3a40f7eb9ed22cdb7d9bf0a9c8 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 13 Feb 2017 23:44:11 -0500 Subject: WT-3144 bug fix: random cursor returns not-found when descending to an empty page (#3289) * If random descent through the tree fails, fallback to skipping through the tree's pages; if skipping through the tree's pages fails, fallback to a random entry from the first page in the tree that contains anything at all. * Add tests that create a tree with enough data for multiple pages, reopens the connection so we have a real tree, then truncates most / all of the tree and makes sure random lookups find data / fail (respectively). That way we're testing WT_REF_DELETED, not just empty pages. * Fix a documentation error, we never implemented a next_random_sample_percent configuration. --- src/btree/bt_cursor.c | 134 ++++++++++++++++++++++++++------------- src/btree/row_srch.c | 14 ++-- src/docs/cursor-random.dox | 5 -- test/suite/test_cursor_random.py | 49 ++++++++++++++ 4 files changed, 144 insertions(+), 58 deletions(-) diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index d18b9b76992..c0b028725c7 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -846,7 +846,7 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_SESSION_IMPL *session; WT_UPDATE *upd; wt_off_t size; - uint64_t skip; + uint64_t n, skip; session = (WT_SESSION_IMPL *)cbt->iface.session; btree = cbt->btree; @@ -862,60 +862,104 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_STAT_CONN_INCR(session, cursor_next); WT_STAT_DATA_INCR(session, cursor_next); +#ifdef HAVE_DIAGNOSTIC /* - * If retrieving random values without sampling, or we don't have a - * page reference, pick a roughly random leaf page in the tree. + * Under some conditions we end up using the underlying cursor.next to + * walk through the object. Since there are multiple calls, we can hit + * the cursor-order checks, turn them off. */ - if (cbt->ref == NULL || cbt->next_random_sample_size == 0) { - /* - * Skip past the sample size of the leaf pages in the tree - * between each random key return to compensate for unbalanced - * trees. - * - * Use the underlying file size divided by its block allocation - * size as our guess of leaf pages in the file (this can be - * entirely wrong, as it depends on how many pages are in this - * particular checkpoint, how large the leaf and internal pages - * really are, and other factors). Then, divide that value by - * the configured sample size and increment the final result to - * make sure tiny files don't leave us with a skip value of 0. - * - * !!! - * Ideally, the number would be prime to avoid restart issues. - */ - if (cbt->next_random_sample_size != 0) { - WT_ERR(btree->bm->size(btree->bm, session, &size)); - cbt->next_random_leaf_skip = (uint64_t) - ((size / btree->allocsize) / - cbt->next_random_sample_size) + 1; - } + __wt_cursor_key_order_reset(cbt); +#endif - /* - * Choose a leaf page from the tree. - */ + /* + * If we don't have a current position in the tree, or if retrieving + * random values without sampling, pick a roughly random leaf page in + * the tree and return an entry from it. + */ + if (cbt->ref == NULL || cbt->next_random_sample_size == 0) { WT_ERR(__cursor_func_init(cbt, true)); WT_WITH_PAGE_INDEX( session, ret = __wt_row_random_descent(session, cbt)); - WT_ERR(ret); - } else { + if (ret == 0) + goto random_page_entry; + /* - * Read through the tree, skipping leaf pages. Be cautious about - * the skip count: if the last leaf page skipped was also the - * last leaf page in the tree, it may be set to zero on return - * with the end-of-walk condition. - * - * Pages read for data sampling aren't "useful"; don't update - * the read generation of pages already in memory, and if a page - * is read, set its generation to a low value so it is evicted - * quickly. + * Random descent may return not-found: the tree might be empty + * or have so many deleted items we didn't find any valid pages. + * We can't return WT_NOTFOUND to the application unless a tree + * is really empty, fallback to skipping through tree pages. */ - for (skip = - cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;) - WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip, - WT_READ_NO_GEN | - WT_READ_SKIP_INTL | WT_READ_WONT_NEED)); + WT_ERR_NOTFOUND_OK(ret); + } + + /* + * Cursor through the tree, skipping past the sample size of the leaf + * pages in the tree between each random key return to compensate for + * unbalanced trees. + * + * If the random descent attempt failed, we don't have a configured + * sample size, use 100 for no particular reason. + */ + if (cbt->next_random_sample_size == 0) + cbt->next_random_sample_size = 100; + + /* + * If the random descent attempt failed, or it's our first skip attempt, + * we haven't yet set the pages to skip, do it now. + * + * Use the underlying file size divided by its block allocation size as + * our guess of leaf pages in the file (this can be entirely wrong, as + * it depends on how many pages are in this particular checkpoint, how + * large the leaf and internal pages really are, and other factors). + * Then, divide that value by the configured sample size and increment + * the final result to make sure tiny files don't leave us with a skip + * value of 0. + * + * !!! + * Ideally, the number would be prime to avoid restart issues. + */ + if (cbt->next_random_leaf_skip == 0) { + WT_ERR(btree->bm->size(btree->bm, session, &size)); + cbt->next_random_leaf_skip = (uint64_t) + ((size / btree->allocsize) / + cbt->next_random_sample_size) + 1; + } + + /* + * Be paranoid about loop termination: first, if the last leaf page + * skipped was also the last leaf page in the tree, skip may be set to + * zero on return along with the NULL WT_REF end-of-walk condition. + * Second, if a tree has no valid pages at all (the condition after + * initial creation), we might make no progress at all, or finally, if + * a tree has only deleted pages, we'll make progress, but never get a + * useful WT_REF. And, of course, the tree can switch from one of these + * states to another without warning. Decrement skip regardless of what + * is happening in the search, guarantee we eventually quit. + * + * Pages read for data sampling aren't "useful"; don't update the read + * generation of pages already in memory, and if a page is read, set + * its generation to a low value so it is evicted quickly. + */ + for (skip = cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;) { + n = skip; + WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip, + WT_READ_NO_GEN | WT_READ_SKIP_INTL | WT_READ_WONT_NEED)); + if (n == skip) { + if (skip == 0) + break; + --skip; + } } + /* + * We can't return WT_NOTFOUND to the application unless a tree is + * really empty, fallback to a random entry from the first page in the + * tree that has anything at all. + */ + if (cbt->ref == NULL) + WT_ERR(__wt_btcur_next(cbt, false)); + +random_page_entry: /* * Select a random entry from the leaf page. If it's not valid, move to * the next entry, if that doesn't work, move to the previous entry. diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c index 1c3d5ad5daa..0858e42356b 100644 --- a/src/btree/row_srch.c +++ b/src/btree/row_srch.c @@ -798,14 +798,7 @@ __wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) current = NULL; retry = 100; - if (0) { -restart: /* - * Discard the currently held page and restart the search from - * the root. - */ - WT_RET(__wt_page_release(session, current, 0)); - } - +restart: /* Search the internal pages of the tree. */ current = &btree->root; for (;;) { @@ -837,6 +830,11 @@ restart: /* break; } if (i == entries || descent == NULL) { + /* + * Discard the currently held page and restart from the + * root. + */ + WT_RET(__wt_page_release(session, current, 0)); if (--retry > 0) goto restart; return (WT_NOTFOUND); diff --git a/src/docs/cursor-random.dox b/src/docs/cursor-random.dox index a0a3212be6d..b6434e3d161 100644 --- a/src/docs/cursor-random.dox +++ b/src/docs/cursor-random.dox @@ -20,9 +20,4 @@ cursor configured using \c next_random_sample_size divides the object into \c next_random_sample_size pieces, and each subsequent retrieval returns a record from the next one of those pieces. -For example, setting \c next_random_sample_percent to \c 10 would cause -the cursor to sequentially return records from each tenth part of the -object. Setting \c next_random_sample_percent to \c 1000 would cause the -cursor to sequentially return records from each .1% of the object. - */ diff --git a/test/suite/test_cursor_random.py b/test/suite/test_cursor_random.py index 3bda6dc9946..ee0f85a29ee 100644 --- a/test/suite/test_cursor_random.py +++ b/test/suite/test_cursor_random.py @@ -71,6 +71,15 @@ class test_cursor_random(wttest.WiredTigerTestCase): self.assertEquals(cursor.reset(), 0) cursor.close() + # Check that next_random fails with an empty tree, repeatedly. + def test_cursor_random_empty(self): + uri = self.type + self.session.create(uri, 'key_format=S,value_format=S') + cursor = self.session.open_cursor(uri, None, self.config) + for i in range(1,5): + self.assertTrue(cursor.next(), wiredtiger.WT_NOTFOUND) + cursor.close + # Check that next_random works with a single value, repeatedly. def test_cursor_random_single_record(self): uri = self.type @@ -127,6 +136,46 @@ class test_cursor_random(wttest.WiredTigerTestCase): def test_cursor_random_multiple_page_records(self): self.cursor_random_multiple_page_records(0) + # Check that next_random fails in the presence of a set of values, some of + # which are deleted. + def test_cursor_random_deleted_partial(self): + uri = self.type + ds = self.dataset(self, uri, 10000, + config='allocation_size=512,leaf_page_max=512') + ds.populate() + + # Close the connection so everything is forced to disk. + self.reopen_conn() + + start = self.session.open_cursor(uri, None) + start.set_key(ds.key(10)) + end = self.session.open_cursor(uri, None) + end.set_key(ds.key(10000-10)) + self.session.truncate(None, start, end, None) + self.assertEqual(start.close(), 0) + self.assertEqual(end.close(), 0) + + cursor = self.session.open_cursor(uri, None, self.config) + for i in range(1,10): + self.assertEqual(cursor.next(), 0) + + # Check that next_random fails in the presence of a set of values, all of + # which are deleted. + def test_cursor_random_deleted_all(self): + uri = self.type + ds = self.dataset(self, uri, 10000, + config='allocation_size=512,leaf_page_max=512') + ds.populate() + + # Close the connection so everything is forced to disk. + self.reopen_conn() + + self.session.truncate(uri, None, None, None) + + cursor = self.session.open_cursor(uri, None, self.config) + for i in range(1,10): + self.assertTrue(cursor.next(), wiredtiger.WT_NOTFOUND) + # Check that opening a random cursor on column-store returns not-supported. class test_cursor_random_column(wttest.WiredTigerTestCase): scenarios = make_scenarios([ -- cgit v1.2.1 From df64d277ae99adf98824fbf2118626c77fd2f199 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Tue, 14 Feb 2017 16:39:24 +1100 Subject: WT-3149 Have eviction choose a random point when walking a tree. (#3285) Only choose a random point when there is no saved walk point. Fixes to random search as well - noticed search termination conditions when sampling the search page vs. walking it sequentially weren't the same. Changed that, which caused the test_compact02 test to fail. There's an underlying bug in this code, if we return WT_NOTFOUND, we can lose a hazard pointer on the page of the tree we unsucessfully searched. Add a page-release in the case of returning not-found. --- dist/filelist | 1 + src/btree/bt_cursor.c | 180 ++-------------------- src/btree/bt_random.c | 413 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/btree/row_srch.c | 237 ----------------------------- src/evict/evict_lru.c | 28 +++- src/include/extern.h | 7 +- 6 files changed, 454 insertions(+), 412 deletions(-) create mode 100644 src/btree/bt_random.c diff --git a/dist/filelist b/dist/filelist index 13d67ef961b..3886035eaa9 100644 --- a/dist/filelist +++ b/dist/filelist @@ -30,6 +30,7 @@ src/btree/bt_io.c src/btree/bt_misc.c src/btree/bt_ovfl.c src/btree/bt_page.c +src/btree/bt_random.c src/btree/bt_read.c src/btree/bt_rebalance.c src/btree/bt_ret.c diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index c0b028725c7..5fde2237538 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -76,11 +76,11 @@ __cursor_fix_implicit(WT_BTREE *btree, WT_CURSOR_BTREE *cbt) } /* - * __cursor_valid -- + * __wt_cursor_valid -- * Return if the cursor references an valid key/value pair. */ -static inline bool -__cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) +bool +__wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) { WT_BTREE *btree; WT_CELL *cell; @@ -330,7 +330,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, cbt->ref, false) : __cursor_col_search(session, cbt, cbt->ref)); - valid = cbt->compare == 0 && __cursor_valid(cbt, &upd); + valid = cbt->compare == 0 && __wt_cursor_valid(cbt, &upd); } if (!valid) { WT_ERR(__cursor_func_init(cbt, true)); @@ -338,7 +338,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, false) : __cursor_col_search(session, cbt, NULL)); - valid = cbt->compare == 0 && __cursor_valid(cbt, &upd); + valid = cbt->compare == 0 && __wt_cursor_valid(cbt, &upd); } if (valid) @@ -419,14 +419,14 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) * Ignore those cases, it makes things too complicated. */ if (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1) - valid = __cursor_valid(cbt, &upd); + valid = __wt_cursor_valid(cbt, &upd); } if (!valid) { WT_ERR(__cursor_func_init(cbt, true)); WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, true) : __cursor_col_search(session, cbt, NULL)); - valid = __cursor_valid(cbt, &upd); + valid = __wt_cursor_valid(cbt, &upd); } /* @@ -462,7 +462,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, true) : __cursor_col_search(session, cbt, NULL)); - if (__cursor_valid(cbt, &upd)) { + if (__wt_cursor_valid(cbt, &upd)) { exact = cbt->compare; ret = __wt_kv_return(session, cbt, upd); } else if ((ret = __wt_btcur_prev(cbt, false)) != WT_NOTFOUND) @@ -537,7 +537,7 @@ retry: WT_RET(__cursor_func_init(cbt, true)); * Fail in that case, the record exists. */ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && - ((cbt->compare == 0 && __cursor_valid(cbt, NULL)) || + ((cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) || (cbt->compare != 0 && __cursor_fix_implicit(btree, cbt)))) WT_ERR(WT_DUPLICATE_KEY); @@ -552,7 +552,7 @@ retry: WT_RET(__cursor_func_init(cbt, true)); * key/value pair. */ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && - cbt->compare == 0 && __cursor_valid(cbt, NULL)) + cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) WT_ERR(WT_DUPLICATE_KEY); ret = __cursor_row_modify(session, cbt, false); @@ -682,12 +682,12 @@ retry: WT_RET(__cursor_func_init(cbt, true)); /* * If we find a matching record, check whether an update would * conflict. Do this before checking if the update is visible - * in __cursor_valid, or we can miss conflict. + * in __wt_cursor_valid, or we can miss conflict. */ WT_ERR(__curfile_update_check(cbt)); /* Remove the record if it exists. */ - if (cbt->compare != 0 || !__cursor_valid(cbt, NULL)) { + if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) { if (!__cursor_fix_implicit(btree, cbt)) WT_ERR(WT_NOTFOUND); /* @@ -711,7 +711,7 @@ retry: WT_RET(__cursor_func_init(cbt, true)); /* Check whether an update would conflict. */ WT_ERR(__curfile_update_check(cbt)); - if (cbt->compare != 0 || !__cursor_valid(cbt, NULL)) + if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) WT_ERR(WT_NOTFOUND); ret = __cursor_row_modify(session, cbt, true); @@ -786,7 +786,8 @@ retry: WT_RET(__cursor_func_init(cbt, true)); */ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { WT_ERR(__curfile_update_check(cbt)); - if ((cbt->compare != 0 || !__cursor_valid(cbt, NULL)) && + if ((cbt->compare != 0 || + !__wt_cursor_valid(cbt, NULL)) && !__cursor_fix_implicit(btree, cbt)) WT_ERR(WT_NOTFOUND); } @@ -800,7 +801,7 @@ retry: WT_RET(__cursor_func_init(cbt, true)); */ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { WT_ERR(__curfile_update_check(cbt)); - if (cbt->compare != 0 || !__cursor_valid(cbt, NULL)) + if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) WT_ERR(WT_NOTFOUND); } ret = __cursor_row_modify(session, cbt, false); @@ -829,155 +830,6 @@ err: if (ret == WT_RESTART) { return (ret); } -/* - * __wt_btcur_next_random -- - * Move to a random record in the tree. There are two algorithms, one - * where we select a record at random from the whole tree on each - * retrieval and one where we first select a record at random from the - * whole tree, and then subsequently sample forward from that location. - * The sampling approach allows us to select reasonably uniform random - * points from unbalanced trees. - */ -int -__wt_btcur_next_random(WT_CURSOR_BTREE *cbt) -{ - WT_BTREE *btree; - WT_DECL_RET; - WT_SESSION_IMPL *session; - WT_UPDATE *upd; - wt_off_t size; - uint64_t n, skip; - - session = (WT_SESSION_IMPL *)cbt->iface.session; - btree = cbt->btree; - - /* - * Only supports row-store: applications can trivially select a random - * value from a column-store, if there were any reason to do so. - */ - if (btree->type != BTREE_ROW) - WT_RET_MSG(session, ENOTSUP, - "WT_CURSOR.next_random only supported by row-store tables"); - - WT_STAT_CONN_INCR(session, cursor_next); - WT_STAT_DATA_INCR(session, cursor_next); - -#ifdef HAVE_DIAGNOSTIC - /* - * Under some conditions we end up using the underlying cursor.next to - * walk through the object. Since there are multiple calls, we can hit - * the cursor-order checks, turn them off. - */ - __wt_cursor_key_order_reset(cbt); -#endif - - /* - * If we don't have a current position in the tree, or if retrieving - * random values without sampling, pick a roughly random leaf page in - * the tree and return an entry from it. - */ - if (cbt->ref == NULL || cbt->next_random_sample_size == 0) { - WT_ERR(__cursor_func_init(cbt, true)); - WT_WITH_PAGE_INDEX( - session, ret = __wt_row_random_descent(session, cbt)); - if (ret == 0) - goto random_page_entry; - - /* - * Random descent may return not-found: the tree might be empty - * or have so many deleted items we didn't find any valid pages. - * We can't return WT_NOTFOUND to the application unless a tree - * is really empty, fallback to skipping through tree pages. - */ - WT_ERR_NOTFOUND_OK(ret); - } - - /* - * Cursor through the tree, skipping past the sample size of the leaf - * pages in the tree between each random key return to compensate for - * unbalanced trees. - * - * If the random descent attempt failed, we don't have a configured - * sample size, use 100 for no particular reason. - */ - if (cbt->next_random_sample_size == 0) - cbt->next_random_sample_size = 100; - - /* - * If the random descent attempt failed, or it's our first skip attempt, - * we haven't yet set the pages to skip, do it now. - * - * Use the underlying file size divided by its block allocation size as - * our guess of leaf pages in the file (this can be entirely wrong, as - * it depends on how many pages are in this particular checkpoint, how - * large the leaf and internal pages really are, and other factors). - * Then, divide that value by the configured sample size and increment - * the final result to make sure tiny files don't leave us with a skip - * value of 0. - * - * !!! - * Ideally, the number would be prime to avoid restart issues. - */ - if (cbt->next_random_leaf_skip == 0) { - WT_ERR(btree->bm->size(btree->bm, session, &size)); - cbt->next_random_leaf_skip = (uint64_t) - ((size / btree->allocsize) / - cbt->next_random_sample_size) + 1; - } - - /* - * Be paranoid about loop termination: first, if the last leaf page - * skipped was also the last leaf page in the tree, skip may be set to - * zero on return along with the NULL WT_REF end-of-walk condition. - * Second, if a tree has no valid pages at all (the condition after - * initial creation), we might make no progress at all, or finally, if - * a tree has only deleted pages, we'll make progress, but never get a - * useful WT_REF. And, of course, the tree can switch from one of these - * states to another without warning. Decrement skip regardless of what - * is happening in the search, guarantee we eventually quit. - * - * Pages read for data sampling aren't "useful"; don't update the read - * generation of pages already in memory, and if a page is read, set - * its generation to a low value so it is evicted quickly. - */ - for (skip = cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;) { - n = skip; - WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip, - WT_READ_NO_GEN | WT_READ_SKIP_INTL | WT_READ_WONT_NEED)); - if (n == skip) { - if (skip == 0) - break; - --skip; - } - } - - /* - * We can't return WT_NOTFOUND to the application unless a tree is - * really empty, fallback to a random entry from the first page in the - * tree that has anything at all. - */ - if (cbt->ref == NULL) - WT_ERR(__wt_btcur_next(cbt, false)); - -random_page_entry: - /* - * Select a random entry from the leaf page. If it's not valid, move to - * the next entry, if that doesn't work, move to the previous entry. - */ - WT_ERR(__wt_row_random_leaf(session, cbt)); - if (__cursor_valid(cbt, &upd)) - WT_ERR(__wt_kv_return(session, cbt, upd)); - else { - if ((ret = __wt_btcur_next(cbt, false)) == WT_NOTFOUND) - ret = __wt_btcur_prev(cbt, false); - WT_ERR(ret); - } - return (0); - -err: WT_TRET(__cursor_reset(cbt)); - return (ret); -} - /* * __wt_btcur_compare -- * Return a comparison between two cursors. diff --git a/src/btree/bt_random.c b/src/btree/bt_random.c new file mode 100644 index 00000000000..3cc6838c4c8 --- /dev/null +++ b/src/btree/bt_random.c @@ -0,0 +1,413 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_row_random_leaf -- + * Return a random key from a row-store leaf page. + */ +int +__wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) +{ + WT_INSERT *ins, **start, **stop; + WT_INSERT_HEAD *ins_head; + WT_PAGE *page; + uint64_t samples; + uint32_t choice, entries, i; + int level; + + page = cbt->ref->page; + start = stop = NULL; /* [-Wconditional-uninitialized] */ + entries = 0; /* [-Wconditional-uninitialized] */ + + __cursor_pos_clear(cbt); + + /* If the page has disk-based entries, select from them. */ + if (page->entries != 0) { + cbt->compare = 0; + cbt->slot = __wt_random(&session->rnd) % page->entries; + + /* + * The real row-store search function builds the key, so we + * have to as well. + */ + return (__wt_row_leaf_key(session, + page, page->pg_row + cbt->slot, cbt->tmp, false)); + } + + /* + * If the tree is new (and not empty), it might have a large insert + * list. + * + * Walk down the list until we find a level with at least 50 entries, + * that's where we'll start rolling random numbers. The value 50 is + * used to ignore levels with only a few entries, that is, levels which + * are potentially badly skewed. + */ + F_SET(cbt, WT_CBT_SEARCH_SMALLEST); + if ((ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL) + return (WT_NOTFOUND); + for (level = WT_SKIP_MAXDEPTH - 1; level >= 0; --level) { + start = &ins_head->head[level]; + for (entries = 0, stop = start; + *stop != NULL; stop = &(*stop)->next[level]) + ++entries; + + if (entries > 50) + break; + } + + /* + * If it's a tiny list and we went all the way to level 0, correct the + * level; entries is correctly set. + */ + if (level < 0) + level = 0; + + /* + * Step down the skip list levels, selecting a random chunk of the name + * space at each level. + */ + for (samples = entries; level > 0; samples += entries) { + /* + * There are (entries) or (entries + 1) chunks of the name space + * considered at each level. They are: between start and the 1st + * element, between the 1st and 2nd elements, and so on to the + * last chunk which is the name space after the stop element on + * the current level. This last chunk of name space may or may + * not be there: as we descend the levels of the skip list, this + * chunk may appear, depending if the next level down has + * entries logically after the stop point in the current level. + * We can't ignore those entries: because of the algorithm used + * to determine the depth of a skiplist, there may be a large + * number of entries "revealed" by descending a level. + * + * If the next level down has more items after the current stop + * point, there are (entries + 1) chunks to consider, else there + * are (entries) chunks. + */ + if (*(stop - 1) == NULL) + choice = __wt_random(&session->rnd) % entries; + else + choice = __wt_random(&session->rnd) % (entries + 1); + + if (choice == entries) { + /* + * We selected the name space after the stop element on + * this level. Set the start point to the current stop + * point, descend a level and move the stop element to + * the end of the list, that is, the end of the newly + * discovered name space, counting entries as we go. + */ + start = stop; + --start; + --level; + for (entries = 0, stop = start; + *stop != NULL; stop = &(*stop)->next[level]) + ++entries; + } else { + /* + * We selected another name space on the level. Move the + * start pointer the selected number of entries forward + * to the start of the selected chunk (if the selected + * number is 0, start won't move). Set the stop pointer + * to the next element in the list and drop both start + * and stop down a level. + */ + for (i = 0; i < choice; ++i) + start = &(*start)->next[level]; + stop = &(*start)->next[level]; + + --start; + --stop; + --level; + + /* Count the entries in the selected name space. */ + for (entries = 0, + ins = *start; ins != *stop; ins = ins->next[level]) + ++entries; + } + } + + /* + * When we reach the bottom level, entries will already be set. Select + * a random entry from the name space and return it. + * + * It should be impossible for the entries count to be 0 at this point, + * but check for it out of paranoia and to quiet static testing tools. + */ + if (entries > 0) + entries = __wt_random(&session->rnd) % entries; + for (ins = *start; entries > 0; --entries) + ins = ins->next[0]; + + cbt->ins = ins; + cbt->ins_head = ins_head; + cbt->compare = 0; + + /* + * Random lookups in newly created collections can be slow if a page + * consists of a large skiplist. Schedule the page for eviction if we + * encounter a large skiplist. This worthwhile because applications + * that take a sample often take many samples, so the overhead of + * traversing the skip list each time accumulates to real time. + */ + if (samples > 5000) + __wt_page_evict_soon(session, cbt->ref); + + return (0); +} + +/* + * __wt_random_descent -- + * Find a random leaf page in a tree. + */ +int +__wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, bool eviction) +{ + WT_BTREE *btree; + WT_DECL_RET; + WT_PAGE *page; + WT_PAGE_INDEX *pindex; + WT_REF *current, *descent; + uint32_t flags, i, entries, retry; + + btree = S2BT(session); + current = NULL; + retry = 100; + + /* Eviction should not be tapped to do eviction. */ + flags = WT_READ_RESTART_OK; + if (eviction) + LF_SET(WT_READ_NO_EVICT); + + if (0) { +restart: /* + * Discard the currently held page and restart the search from + * the root. + */ + WT_RET(__wt_page_release(session, current, flags)); + } + + /* Search the internal pages of the tree. */ + current = &btree->root; + for (;;) { + page = current->page; + if (!WT_PAGE_IS_INTERNAL(page)) + break; + + WT_INTL_INDEX_GET(session, page, pindex); + entries = pindex->entries; + + /* + * There may be empty pages in the tree, and they're useless to + * us. If we don't find a non-empty page in "entries" random + * guesses, take the first non-empty page in the tree. If the + * search page contains nothing other than empty pages, restart + * from the root some number of times before giving up. + * + * Eviction is only looking for a place in the cache and so only + * wants in-memory pages (but a deleted page is fine); currently + * our other caller is looking for a key/value pair on a random + * leave page, and so will accept any page that contains a valid + * key/value pair, so on-disk is fine, but deleted is not. + */ + descent = NULL; + for (i = 0; i < entries; ++i) { + descent = + pindex->index[__wt_random(&session->rnd) % entries]; + if (descent->state == WT_REF_MEM || + (!eviction && descent->state == WT_REF_DISK)) + break; + } + if (i == entries) + for (i = 0; i < entries; ++i) { + descent = pindex->index[i]; + if (descent->state == WT_REF_MEM || + (!eviction && + descent->state == WT_REF_DISK)) + break; + } + if (i == entries || descent == NULL) { + if (--retry > 0) + goto restart; + + WT_RET(__wt_page_release(session, current, flags)); + return (WT_NOTFOUND); + } + + /* + * Swap the current page for the child page. If the page splits + * while we're retrieving it, restart the search at the root. + * + * On other error, simply return, the swap call ensures we're + * holding nothing on failure. + */ + if ((ret = + __wt_page_swap(session, current, descent, flags)) == 0) { + current = descent; + continue; + } + if (ret == WT_RESTART) + goto restart; + return (ret); + } + + *refp = current; + return (0); +} + +/* + * __wt_btcur_next_random -- + * Move to a random record in the tree. There are two algorithms, one + * where we select a record at random from the whole tree on each + * retrieval and one where we first select a record at random from the + * whole tree, and then subsequently sample forward from that location. + * The sampling approach allows us to select reasonably uniform random + * points from unbalanced trees. + */ +int +__wt_btcur_next_random(WT_CURSOR_BTREE *cbt) +{ + WT_BTREE *btree; + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_UPDATE *upd; + wt_off_t size; + uint64_t n, skip; + + session = (WT_SESSION_IMPL *)cbt->iface.session; + btree = cbt->btree; + + /* + * Only supports row-store: applications can trivially select a random + * value from a column-store, if there were any reason to do so. + */ + if (btree->type != BTREE_ROW) + WT_RET_MSG(session, ENOTSUP, + "WT_CURSOR.next_random only supported by row-store tables"); + + WT_STAT_CONN_INCR(session, cursor_next); + WT_STAT_DATA_INCR(session, cursor_next); + +#ifdef HAVE_DIAGNOSTIC + /* + * Under some conditions we end up using the underlying cursor.next to + * walk through the object. Since there are multiple calls, we can hit + * the cursor-order checks, turn them off. + */ + __wt_cursor_key_order_reset(cbt); +#endif + + /* + * If we don't have a current position in the tree, or if retrieving + * random values without sampling, pick a roughly random leaf page in + * the tree and return an entry from it. + */ + if (cbt->ref == NULL || cbt->next_random_sample_size == 0) { + WT_ERR(__cursor_func_init(cbt, true)); + WT_WITH_PAGE_INDEX(session, + ret = __wt_random_descent(session, &cbt->ref, false)); + if (ret == 0) + goto random_page_entry; + + /* + * Random descent may return not-found: the tree might be empty + * or have so many deleted items we didn't find any valid pages. + * We can't return WT_NOTFOUND to the application unless a tree + * is really empty, fallback to skipping through tree pages. + */ + WT_ERR_NOTFOUND_OK(ret); + } + + /* + * Cursor through the tree, skipping past the sample size of the leaf + * pages in the tree between each random key return to compensate for + * unbalanced trees. + * + * If the random descent attempt failed, we don't have a configured + * sample size, use 100 for no particular reason. + */ + if (cbt->next_random_sample_size == 0) + cbt->next_random_sample_size = 100; + + /* + * If the random descent attempt failed, or it's our first skip attempt, + * we haven't yet set the pages to skip, do it now. + * + * Use the underlying file size divided by its block allocation size as + * our guess of leaf pages in the file (this can be entirely wrong, as + * it depends on how many pages are in this particular checkpoint, how + * large the leaf and internal pages really are, and other factors). + * Then, divide that value by the configured sample size and increment + * the final result to make sure tiny files don't leave us with a skip + * value of 0. + * + * !!! + * Ideally, the number would be prime to avoid restart issues. + */ + if (cbt->next_random_leaf_skip == 0) { + WT_ERR(btree->bm->size(btree->bm, session, &size)); + cbt->next_random_leaf_skip = (uint64_t) + ((size / btree->allocsize) / + cbt->next_random_sample_size) + 1; + } + + /* + * Be paranoid about loop termination: first, if the last leaf page + * skipped was also the last leaf page in the tree, skip may be set to + * zero on return along with the NULL WT_REF end-of-walk condition. + * Second, if a tree has no valid pages at all (the condition after + * initial creation), we might make no progress at all, or finally, if + * a tree has only deleted pages, we'll make progress, but never get a + * useful WT_REF. And, of course, the tree can switch from one of these + * states to another without warning. Decrement skip regardless of what + * is happening in the search, guarantee we eventually quit. + * + * Pages read for data sampling aren't "useful"; don't update the read + * generation of pages already in memory, and if a page is read, set + * its generation to a low value so it is evicted quickly. + */ + for (skip = cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;) { + n = skip; + WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip, + WT_READ_NO_GEN | WT_READ_SKIP_INTL | WT_READ_WONT_NEED)); + if (n == skip) { + if (skip == 0) + break; + --skip; + } + } + + /* + * We can't return WT_NOTFOUND to the application unless a tree is + * really empty, fallback to a random entry from the first page in the + * tree that has anything at all. + */ + if (cbt->ref == NULL) + WT_ERR(__wt_btcur_next(cbt, false)); + +random_page_entry: + /* + * Select a random entry from the leaf page. If it's not valid, move to + * the next entry, if that doesn't work, move to the previous entry. + */ + WT_ERR(__wt_row_random_leaf(session, cbt)); + if (__wt_cursor_valid(cbt, &upd)) + WT_ERR(__wt_kv_return(session, cbt, upd)); + else { + if ((ret = __wt_btcur_next(cbt, false)) == WT_NOTFOUND) + ret = __wt_btcur_prev(cbt, false); + WT_ERR(ret); + } + return (0); + +err: WT_TRET(__cursor_reset(cbt)); + return (ret); +} diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c index 0858e42356b..9c3d467340e 100644 --- a/src/btree/row_srch.c +++ b/src/btree/row_srch.c @@ -623,240 +623,3 @@ leaf_match: cbt->compare = 0; err: WT_TRET(__wt_page_release(session, current, 0)); return (ret); } - -/* - * __wt_row_random_leaf -- - * Return a random key from a row-store leaf page. - */ -int -__wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) -{ - WT_INSERT *ins, **start, **stop; - WT_INSERT_HEAD *ins_head; - WT_PAGE *page; - uint64_t samples; - uint32_t choice, entries, i; - int level; - - page = cbt->ref->page; - start = stop = NULL; /* [-Wconditional-uninitialized] */ - entries = 0; /* [-Wconditional-uninitialized] */ - - __cursor_pos_clear(cbt); - - /* If the page has disk-based entries, select from them. */ - if (page->entries != 0) { - cbt->compare = 0; - cbt->slot = __wt_random(&session->rnd) % page->entries; - - /* - * The real row-store search function builds the key, so we - * have to as well. - */ - return (__wt_row_leaf_key(session, - page, page->pg_row + cbt->slot, cbt->tmp, false)); - } - - /* - * If the tree is new (and not empty), it might have a large insert - * list. - * - * Walk down the list until we find a level with at least 50 entries, - * that's where we'll start rolling random numbers. The value 50 is - * used to ignore levels with only a few entries, that is, levels which - * are potentially badly skewed. - */ - F_SET(cbt, WT_CBT_SEARCH_SMALLEST); - if ((ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL) - return (WT_NOTFOUND); - for (level = WT_SKIP_MAXDEPTH - 1; level >= 0; --level) { - start = &ins_head->head[level]; - for (entries = 0, stop = start; - *stop != NULL; stop = &(*stop)->next[level]) - ++entries; - - if (entries > 50) - break; - } - - /* - * If it's a tiny list and we went all the way to level 0, correct the - * level; entries is correctly set. - */ - if (level < 0) - level = 0; - - /* - * Step down the skip list levels, selecting a random chunk of the name - * space at each level. - */ - for (samples = entries; level > 0; samples += entries) { - /* - * There are (entries) or (entries + 1) chunks of the name space - * considered at each level. They are: between start and the 1st - * element, between the 1st and 2nd elements, and so on to the - * last chunk which is the name space after the stop element on - * the current level. This last chunk of name space may or may - * not be there: as we descend the levels of the skip list, this - * chunk may appear, depending if the next level down has - * entries logically after the stop point in the current level. - * We can't ignore those entries: because of the algorithm used - * to determine the depth of a skiplist, there may be a large - * number of entries "revealed" by descending a level. - * - * If the next level down has more items after the current stop - * point, there are (entries + 1) chunks to consider, else there - * are (entries) chunks. - */ - if (*(stop - 1) == NULL) - choice = __wt_random(&session->rnd) % entries; - else - choice = __wt_random(&session->rnd) % (entries + 1); - - if (choice == entries) { - /* - * We selected the name space after the stop element on - * this level. Set the start point to the current stop - * point, descend a level and move the stop element to - * the end of the list, that is, the end of the newly - * discovered name space, counting entries as we go. - */ - start = stop; - --start; - --level; - for (entries = 0, stop = start; - *stop != NULL; stop = &(*stop)->next[level]) - ++entries; - } else { - /* - * We selected another name space on the level. Move the - * start pointer the selected number of entries forward - * to the start of the selected chunk (if the selected - * number is 0, start won't move). Set the stop pointer - * to the next element in the list and drop both start - * and stop down a level. - */ - for (i = 0; i < choice; ++i) - start = &(*start)->next[level]; - stop = &(*start)->next[level]; - - --start; - --stop; - --level; - - /* Count the entries in the selected name space. */ - for (entries = 0, - ins = *start; ins != *stop; ins = ins->next[level]) - ++entries; - } - } - - /* - * When we reach the bottom level, entries will already be set. Select - * a random entry from the name space and return it. - * - * It should be impossible for the entries count to be 0 at this point, - * but check for it out of paranoia and to quiet static testing tools. - */ - if (entries > 0) - entries = __wt_random(&session->rnd) % entries; - for (ins = *start; entries > 0; --entries) - ins = ins->next[0]; - - cbt->ins = ins; - cbt->ins_head = ins_head; - cbt->compare = 0; - - /* - * Random lookups in newly created collections can be slow if a page - * consists of a large skiplist. Schedule the page for eviction if we - * encounter a large skiplist. This worthwhile because applications - * that take a sample often take many samples, so the overhead of - * traversing the skip list each time accumulates to real time. - */ - if (samples > 5000) - __wt_page_evict_soon(session, cbt->ref); - - return (0); -} - -/* - * __wt_row_random_descent -- - * Find a random leaf page in a row-store tree. - */ -int -__wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) -{ - WT_BTREE *btree; - WT_DECL_RET; - WT_PAGE *page; - WT_PAGE_INDEX *pindex; - WT_REF *current, *descent; - uint32_t i, entries, retry; - - btree = S2BT(session); - current = NULL; - retry = 100; - -restart: - /* Search the internal pages of the tree. */ - current = &btree->root; - for (;;) { - page = current->page; - if (page->type != WT_PAGE_ROW_INT) - break; - - WT_INTL_INDEX_GET(session, page, pindex); - entries = pindex->entries; - - /* - * There may be empty pages in the tree, and they're useless to - * us. If we don't find a non-empty page in "entries" random - * guesses, take the first non-empty page in the tree. If the - * search page contains nothing other than empty pages, restart - * from the root some number of times before giving up. - */ - descent = NULL; - for (i = 0; i < entries; ++i) { - descent = - pindex->index[__wt_random(&session->rnd) % entries]; - if (descent->state != WT_REF_DELETED) - break; - } - if (i == entries) - for (i = 0; i < entries; ++i) { - descent = pindex->index[i]; - if (descent->state != WT_REF_DELETED) - break; - } - if (i == entries || descent == NULL) { - /* - * Discard the currently held page and restart from the - * root. - */ - WT_RET(__wt_page_release(session, current, 0)); - if (--retry > 0) - goto restart; - return (WT_NOTFOUND); - } - - /* - * Swap the current page for the child page. If the page splits - * while we're retrieving it, restart the search at the root. - * - * On other error, simply return, the swap call ensures we're - * holding nothing on failure. - */ - if ((ret = __wt_page_swap( - session, current, descent, WT_READ_RESTART_OK)) == 0) { - current = descent; - continue; - } - if (ret == WT_RESTART) - goto restart; - return (ret); - } - - cbt->ref = current; - return (0); -} diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index efe056aee02..42fe4d4608e 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1654,10 +1654,29 @@ __evict_walk_file(WT_SESSION_IMPL *session, !F_ISSET(cache, WT_CACHE_EVICT_CLEAN)) min_pages *= 10; + /* + * Choose a random point in the tree if looking for candidates in a + * tree with no starting point set. This is mostly aimed at ensuring + * eviction fairly visits all pages in trees with a lot of in-cache + * content. + */ + if (btree->evict_ref == NULL) { + /* Ensure internal pages indexes remain valid for our walk */ + WT_WITH_PAGE_INDEX(session, ret = + __wt_random_descent(session, &btree->evict_ref, true)); + WT_RET_NOTFOUND_OK(ret); + + /* + * Reverse the direction of the walk each time we start at a + * random point so both ends of the tree are equally likely to + * be visited. + */ + btree->evict_walk_reverse = !btree->evict_walk_reverse; + } + walk_flags = WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT; - /* Randomize the walk direction. */ if (btree->evict_walk_reverse) FLD_SET(walk_flags, WT_READ_PREV); @@ -1799,13 +1818,6 @@ fast: /* If the page can't be evicted, give up. */ WT_STAT_CONN_INCRV( session, cache_eviction_pages_queued, (u_int)(evict - start)); - /* - * If gave up the walk, reverse the direction of the walk and skip it - * next time. - */ - if (give_up) - btree->evict_walk_reverse = !btree->evict_walk_reverse; - /* * If we couldn't find the number of pages we were looking for, skip * the tree next time. diff --git a/src/include/extern.h b/src/include/extern.h index 836a7cb1ae6..8e55077c2a9 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -98,6 +98,7 @@ extern void __wt_cursor_key_order_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_A extern void __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern bool __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -105,7 +106,6 @@ extern int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((w extern int __wt_btcur_update_check(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -150,6 +150,9 @@ extern int __wt_ovfl_cache(WT_SESSION_IMPL *session, WT_PAGE *page, void *cookie extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, size_t memsize, uint32_t flags, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, bool eviction) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_las_remove_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags @@ -193,8 +196,6 @@ extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE extern void __wt_update_obsolete_free( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_las_stats_update(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_las_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_las_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -- cgit v1.2.1 From 92c48cfcd9c66ba66386fd48ca326ec750057d86 Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Tue, 14 Feb 2017 00:56:29 -0500 Subject: WT-2909 Add a custom file system fault injection test to verify checkpoint integrity (#3272) Implement a custom file system, and use it via a test case to add validate checkpoint integrity in the face of file-system level errors. --- dist/s_string.ok | 2 + dist/s_void | 1 + ext/test/fail_fs/fail_fs.c | 197 ++++++-- test/csuite/Makefile.am | 3 + test/csuite/wt2909_checkpoint_integrity/main.c | 660 +++++++++++++++++++++++++ test/utility/misc.c | 2 +- test/utility/test_util.h | 2 +- 7 files changed, 827 insertions(+), 40 deletions(-) create mode 100644 test/csuite/wt2909_checkpoint_integrity/main.c diff --git a/dist/s_string.ok b/dist/s_string.ok index d2e9dffaa48..e033f77327f 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -770,6 +770,7 @@ idx ifdef ifdef's iiSii +iiiS iiii iiu ikey @@ -1138,6 +1139,7 @@ subgetraw subgets subinit sublicense +subtest subtree sunique superset diff --git a/dist/s_void b/dist/s_void index 947153e730b..90425d5a718 100755 --- a/dist/s_void +++ b/dist/s_void @@ -82,6 +82,7 @@ func_ok() -e '/int fail_file_sync$/d' \ -e '/int fail_fs_directory_list_free$/d' \ -e '/int fail_fs_exist$/d' \ + -e '/int fail_fs_simulate_fail$/d' \ -e '/int fail_fs_terminate$/d' \ -e '/int handle_message$/d' \ -e '/int handle_progress$/d' \ diff --git a/ext/test/fail_fs/fail_fs.c b/ext/test/fail_fs/fail_fs.c index 29d469768c5..a6376ce203b 100644 --- a/ext/test/fail_fs/fail_fs.c +++ b/ext/test/fail_fs/fail_fs.c @@ -35,16 +35,29 @@ #include #include #include +#include #include #include "queue.h" -#define FAIL_FS_GIGABYTE (1024 * 1024 * 1024) +#define FAIL_FS_GIGABYTE (1024 * 1024 * 1024) + +#define FAIL_FS_ENV_ENABLE "WT_FAIL_FS_ENABLE" +#define FAIL_FS_ENV_WRITE_ALLOW "WT_FAIL_FS_WRITE_ALLOW" +#define FAIL_FS_ENV_READ_ALLOW "WT_FAIL_FS_READ_ALLOW" /* * A "fail file system", that is, a file system extension that fails when we - * want it to. This is only used in test frameworks, this fact allows us - * to simplify some error paths. + * want it to. This is only used in test frameworks, this fact allows us to + * simplify some error paths. This code is not portable to Windows, as it has + * direct knowledge of file descriptors, environment variables and stack + * traces. + * + * When the filesystem extension is configured, parameters can set how many + * reads or writes can be allowed before failure. If this is not fine-grained + * enough, an 'environment' configuration parameter can be specified. If that + * is used, then on every file system read or write, environment variables are + * checked that control when reading or writing should fail. */ typedef struct { WT_FILE_SYSTEM iface; @@ -54,6 +67,9 @@ typedef struct { * uses a single, global file system lock. */ pthread_rwlock_t lock; /* Lock */ + bool fail_enabled; + bool use_environment; + bool verbose; int64_t read_ops; int64_t write_ops; int64_t allow_reads; @@ -86,12 +102,12 @@ static int fail_file_truncate(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t); static int fail_file_write( WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, const void *); static bool fail_fs_arg( - const char *match, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value, - int64_t *argp); + const char *, WT_CONFIG_ITEM *, WT_CONFIG_ITEM *, int64_t *); static int fail_fs_directory_list(WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, char ***, uint32_t *); static int fail_fs_directory_list_free( WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t); +static void fail_fs_env(const char *, int64_t *); static int fail_fs_exist(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *); static int fail_fs_open(WT_FILE_SYSTEM *, WT_SESSION *, const char *, WT_FS_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **); @@ -99,6 +115,8 @@ static int fail_fs_remove( WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t); static int fail_fs_rename( WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t); +static int fail_fs_simulate_fail( + FAIL_FILE_HANDLE *, WT_SESSION *, int64_t, const char *); static int fail_fs_size( WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *); static int fail_fs_terminate(WT_FILE_SYSTEM *, WT_SESSION *); @@ -145,8 +163,12 @@ fail_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *session) fail_fh = (FAIL_FILE_HANDLE *)file_handle; + /* + * We don't actually open an fd when opening directories for flushing, + * so ignore that case here. + */ if (fail_fh->fd < 0) - return (EINVAL); + return (0); ret = close(fail_fh->fd); fail_fh->fd = -1; fail_file_handle_remove(session, fail_fh); @@ -198,7 +220,7 @@ fail_file_read(WT_FILE_HANDLE *file_handle, FAIL_FILE_HANDLE *fail_fh; FAIL_FILE_SYSTEM *fail_fs; WT_EXTENSION_API *wtext; - int64_t read_ops; + int64_t envint, read_ops; int ret; size_t chunk; ssize_t nr; @@ -207,19 +229,34 @@ fail_file_read(WT_FILE_HANDLE *file_handle, fail_fh = (FAIL_FILE_HANDLE *)file_handle; fail_fs = fail_fh->fail_fs; wtext = fail_fs->wtext; + read_ops = 0; ret = 0; fail_fs_lock(&fail_fs->lock); - read_ops = ++fail_fs->read_ops; + + if (fail_fs->use_environment) { + fail_fs_env(FAIL_FS_ENV_ENABLE, &envint); + if (envint != 0) { + if (!fail_fs->fail_enabled) { + fail_fs->fail_enabled = true; + fail_fs_env(FAIL_FS_ENV_READ_ALLOW, + &fail_fs->allow_reads); + fail_fs->read_ops = 0; + } + read_ops = ++fail_fs->read_ops; + } else + fail_fs->fail_enabled = false; + } else + read_ops = ++fail_fs->read_ops; + fail_fs_unlock(&fail_fs->lock); - if (fail_fs->allow_reads != 0 && read_ops % fail_fs->allow_reads == 0) { - (void)wtext->msg_printf(wtext, session, - "fail_fs: %s: simulated failure after %" PRId64 - " reads\n", fail_fh->iface.name, read_ops); - return (EIO); - } + if (fail_fs->fail_enabled && fail_fs->allow_reads != 0 && + read_ops % fail_fs->allow_reads == 0) + return (fail_fs_simulate_fail( + fail_fh, session, read_ops, "read")); + /* Break reads larger than 1GB into 1GB chunks. */ for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { chunk = (len < FAIL_FS_GIGABYTE) ? len : FAIL_FS_GIGABYTE; if ((nr = pread(fail_fh->fd, addr, chunk, offset)) <= 0) { @@ -262,7 +299,7 @@ fail_file_size( /* * fail_file_sync -- * Ensure the content of the file is stable. This is a no-op in our - * memory backed file system. + * file system. */ static int fail_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *session) @@ -300,7 +337,7 @@ fail_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *session, FAIL_FILE_HANDLE *fail_fh; FAIL_FILE_SYSTEM *fail_fs; WT_EXTENSION_API *wtext; - int64_t write_ops; + int64_t envint, write_ops; int ret; size_t chunk; ssize_t nr; @@ -309,19 +346,32 @@ fail_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *session, fail_fh = (FAIL_FILE_HANDLE *)file_handle; fail_fs = fail_fh->fail_fs; wtext = fail_fs->wtext; + write_ops = 0; ret = 0; fail_fs_lock(&fail_fs->lock); - write_ops = ++fail_fs->write_ops; + + if (fail_fs->use_environment) { + fail_fs_env(FAIL_FS_ENV_ENABLE, &envint); + if (envint != 0) { + if (!fail_fs->fail_enabled) { + fail_fs->fail_enabled = true; + fail_fs_env(FAIL_FS_ENV_WRITE_ALLOW, + &fail_fs->allow_writes); + fail_fs->write_ops = 0; + } + write_ops = ++fail_fs->write_ops; + } else + fail_fs->fail_enabled = false; + } else + write_ops = ++fail_fs->write_ops; + fail_fs_unlock(&fail_fs->lock); - if (fail_fs->allow_writes != 0 && - write_ops % fail_fs->allow_writes == 0) { - (void)wtext->msg_printf(wtext, session, - "fail_fs: %s: simulated failure after %" PRId64 - " writes\n", fail_fh->iface.name, write_ops); - return (EIO); - } + if (fail_fs->fail_enabled && fail_fs->allow_writes != 0 && + write_ops % fail_fs->allow_writes == 0) + return (fail_fs_simulate_fail( + fail_fh, session, write_ops, "write")); /* Break writes larger than 1GB into 1GB chunks. */ for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { @@ -348,17 +398,12 @@ static bool fail_fs_arg(const char *match, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value, int64_t *argp) { - char *s; - int64_t result; - if (strncmp(match, key->str, key->len) == 0 && - match[key->len] == '\0') { - s = (char *)value->str; - result = strtoll(s, &s, 10); - if ((size_t)(s - (char *)value->str) == value->len) { - *argp = result; - return (true); - } + match[key->len] == '\0' && + (value->type == WT_CONFIG_ITEM_BOOL || + value->type == WT_CONFIG_ITEM_NUM)) { + *argp = value->val; + return (true); } return (false); } @@ -453,6 +498,30 @@ fail_fs_directory_list_free(WT_FILE_SYSTEM *file_system, return (0); } +/* + * fail_fs_env -- + * If the name is in the environment, return its integral value. + */ +static void +fail_fs_env(const char *name, int64_t *valp) +{ + int64_t result; + char *s, *value; + + result = 0; + if ((value = getenv(name)) != NULL) { + s = value; + if (strcmp(value, "true") == 0) + result = 1; + else if (strcmp(value, "false") != 0) { + result = strtoll(value, &s, 10); + if (*s != '\0') + result = 0; + } + } + *valp = result; +} + /* * fail_fs_exist -- * Return if the file exists. @@ -482,7 +551,6 @@ fail_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, WT_FILE_HANDLE *file_handle; int fd, open_flags, ret; - (void)file_type; /* Unused */ (void)session; /* Unused */ *file_handlep = NULL; @@ -492,6 +560,9 @@ fail_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, fd = -1; ret = 0; + if (fail_fs->verbose) + fprintf(stderr, "fail_fs: open: %s\n", name); + fail_fs_lock(&fail_fs->lock); open_flags = 0; @@ -504,7 +575,14 @@ fail_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, else open_flags |= O_RDWR; - if ((fd = open(name, open_flags, 0666)) < 0) { + /* + * Opening a file handle on a directory is only to support filesystems + * that require a directory sync for durability. This is a no-op + * for this file system. + */ + if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY) + fd = -1; + else if ((fd = open(name, open_flags, 0666)) < 0) { ret = errno; goto err; } @@ -587,6 +665,38 @@ fail_fs_rename(WT_FILE_SYSTEM *file_system, return (rename(from, to)); } +/* + * fail_fs_simulate_fail -- + * Simulate a failure from this file system by reporting it + * and returning a non-zero return code. + */ +static int +fail_fs_simulate_fail(FAIL_FILE_HANDLE *fail_fh, WT_SESSION *session, + int64_t nops, const char *opkind) +{ + FAIL_FILE_SYSTEM *fail_fs; + WT_EXTENSION_API *wtext; + int btret, i; + void *bt[100]; + char **btstr; + + fail_fs = fail_fh->fail_fs; + if (fail_fs->verbose) { + wtext = fail_fs->wtext; + (void)wtext->msg_printf(wtext, session, + "fail_fs: %s: simulated failure after %" PRId64 + " %s operations\n", fail_fh->iface.name, nops, opkind); + btret = backtrace(bt, sizeof(bt)/sizeof(bt[0])); + if ((btstr = backtrace_symbols(bt, btret)) != NULL) { + for (i = 0; i < btret; i++) + (void)wtext->msg_printf(wtext, session, " %s", + btstr[i]); + free(btstr); + } + } + return (EIO); +} + /* * fail_fs_size -- * Get the size of a file in bytes, by file name. @@ -641,6 +751,7 @@ wiredtiger_extension_init(WT_CONNECTION *conn, WT_CONFIG_ARG *config) WT_CONFIG_PARSER *config_parser; WT_EXTENSION_API *wtext; WT_FILE_SYSTEM *file_system; + int64_t argval; int ret; ret = 0; @@ -663,9 +774,17 @@ wiredtiger_extension_init(WT_CONNECTION *conn, WT_CONFIG_ARG *config) goto err; } while ((ret = config_parser->next(config_parser, &k, &v)) == 0) { - if (fail_fs_arg("allow_writes", &k, &v, &fail_fs->allow_writes)) + if (fail_fs_arg("environment", &k, &v, &argval)) { + fail_fs->use_environment = (argval != 0); + continue; + } else if (fail_fs_arg("verbose", &k, &v, &argval)) { + fail_fs->verbose = (argval != 0); + continue; + } else if (fail_fs_arg("allow_writes", &k, &v, + &fail_fs->allow_writes)) continue; - if (fail_fs_arg("allow_reads", &k, &v, &fail_fs->allow_reads)) + else if (fail_fs_arg("allow_reads", &k, &v, + &fail_fs->allow_reads)) continue; (void)wtext->err_printf(wtext, NULL, @@ -687,6 +806,8 @@ wiredtiger_extension_init(WT_CONNECTION *conn, WT_CONFIG_ARG *config) wtext->strerror(wtext, NULL, ret)); goto err; } + if (fail_fs->allow_writes != 0 || fail_fs->allow_reads != 0) + fail_fs->fail_enabled = true; fail_fs_allocate_lock(&fail_fs->lock); /* Initialize the in-memory jump table. */ diff --git a/test/csuite/Makefile.am b/test/csuite/Makefile.am index 5167b42b433..0158d0c96d1 100644 --- a/test/csuite/Makefile.am +++ b/test/csuite/Makefile.am @@ -37,6 +37,9 @@ noinst_PROGRAMS += test_wt2834_join_bloom_fix test_wt2853_perf_SOURCES = wt2853_perf/main.c noinst_PROGRAMS += test_wt2853_perf +test_wt2909_checkpoint_integrity_SOURCES = wt2909_checkpoint_integrity/main.c +noinst_PROGRAMS += test_wt2909_checkpoint_integrity + test_wt2999_join_extractor_SOURCES = wt2999_join_extractor/main.c noinst_PROGRAMS += test_wt2999_join_extractor diff --git a/test/csuite/wt2909_checkpoint_integrity/main.c b/test/csuite/wt2909_checkpoint_integrity/main.c new file mode 100644 index 00000000000..efc459ff271 --- /dev/null +++ b/test/csuite/wt2909_checkpoint_integrity/main.c @@ -0,0 +1,660 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +#include +#include +#include + +/* + * JIRA ticket reference: WT-2909 + * Test case description: + * + * This test attempts to check the integrity of checkpoints by injecting + * failures (by means of a custom file system) and then trying to recover. To + * insulate the top level program from various crashes that may occur when + * injecting failures, the "populate" code runs in another process, and is + * expected to sometimes fail. Then the top level program runs recovery (with + * the normal file system) and checks the results. Any failure at the top level + * indicates a checkpoint integrity problem. + * + * Each subtest uses the same kind of schema and data, the only variance is + * when the faults are injected. At the moment, this test only injects during + * checkpoints, and only injects write failures. It varies in the number of + * successful writes that occur before an injected failure (during a checkpoint + * operation), this can be indicated with "-o N". When N is not specified, the + * test attempts to find the optimal range of N for testing. Clearly when N is + * large, then the checkpoint may be successfully written, and the data + * represented by the checkpoint will be fully present. When N is small, + * nothing of interest is written and no data is present. To find the sweet + * spot where interesting failures occur, the test does a binary search to find + * the approximate N that divides the "small" and "large" cases. This is not + * strictly deterministic, a given N may give different results on different + * runs. But approximate optimal N can be determined, allowing a series of + * additional tests clustered around this N. + * + * The data is stored in two tables, one having indices. Both tables have + * the same keys and are updated with the same key in a single transaction. + * + * Failure mode: + * If one table is out of step with the other, that is detected as a failure at + * the top level. If an index is missing values (or has extra values), that is + * likewise a failure at the top level. If the tables or the home directory + * cannot be opened, that is a top level error. The tables must be present + * as an initial checkpoint is done without any injected fault. + */ + +/* + * This program does not run on Windows. The non-portable aspects at minimum + * are fork/exec the use of environment variables (used by fail_fs), and file + * name and build locations of dynamically loaded libraries. + */ +#define BIG_SIZE (1024 * 10) +#define BIG_CONTENTS "" +#define MAX_ARGS 20 +#define MAX_OP_RANGE 1000 +#define STDERR_FILE "stderr.txt" +#define STDOUT_FILE "stdout.txt" +#define TESTS_PER_OP_VALUE 3 +#define VERBOSE_PRINT 10000 + +static int check_results(TEST_OPTS *, uint64_t *); +static void check_values(WT_CURSOR *, int, int, int, char *); +static int create_big_string(char **); +static void cursor_count_items(WT_CURSOR *, uint64_t *); +static void disable_failures(void); +static void enable_failures(uint64_t, uint64_t); +static void generate_key(uint32_t, int *); +static void generate_value(uint32_t, uint32_t, char *, int *, int *, int *, + char **); +static void run_check_subtest(TEST_OPTS *, const char *, uint64_t, bool, + uint64_t *); +static void run_check_subtest_range(TEST_OPTS *, const char *, bool); +static int run_process(TEST_OPTS *, const char *, char *[], int *); +static int subtest_main(int, char *[], bool); +static void subtest_populate(TEST_OPTS *, bool); +int main(int, char *[]); + +extern int __wt_optind; + +#define WT_FAIL_FS_LIB "../../ext/test/fail_fs/.libs/libwiredtiger_fail_fs.so" + +/* + * check_results -- + * Check all the tables and verify the results. + */ +static int +check_results(TEST_OPTS *opts, uint64_t *foundp) +{ + WT_CURSOR *maincur, *maincur2, *v0cur, *v1cur, *v2cur; + WT_SESSION *session; + uint64_t count, idxcount, nrecords; + uint32_t rndint; + int key, key_got, ret, v0, v1, v2; + char *bigref, *big; + + testutil_check(create_big_string(&bigref)); + nrecords = opts->nrecords; + testutil_check(wiredtiger_open(opts->home, NULL, + "create,log=(enabled)", &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + + testutil_check(session->open_cursor(session, "table:subtest", NULL, + NULL, &maincur)); + testutil_check(session->open_cursor(session, "table:subtest2", NULL, + NULL, &maincur2)); + testutil_check(session->open_cursor(session, "index:subtest:v0", NULL, + NULL, &v0cur)); + testutil_check(session->open_cursor(session, "index:subtest:v1", NULL, + NULL, &v1cur)); + testutil_check(session->open_cursor(session, "index:subtest:v2", NULL, + NULL, &v2cur)); + + count = 0; + while ((ret = maincur->next(maincur)) == 0) { + testutil_check(maincur2->next(maincur2)); + testutil_check(maincur2->get_key(maincur2, &key_got)); + testutil_check(maincur2->get_value(maincur2, &rndint)); + + generate_key((uint32_t)count, &key); + generate_value(rndint, (uint32_t)count, + bigref, &v0, &v1, &v2, &big); + testutil_assert(key == key_got); + + /* Check the key/values in main table. */ + testutil_check(maincur->get_key(maincur, &key_got)); + testutil_assert(key == key_got); + check_values(maincur, v0, v1, v2, big); + + /* Check the values in the indices. */ + v0cur->set_key(v0cur, v0); + testutil_check(v0cur->search(v0cur)); + check_values(v0cur, v0, v1, v2, big); + v1cur->set_key(v1cur, v1); + testutil_check(v1cur->search(v1cur)); + check_values(v1cur, v0, v1, v2, big); + v2cur->set_key(v2cur, v2); + testutil_check(v2cur->search(v2cur)); + check_values(v2cur, v0, v1, v2, big); + + count++; + if (count % VERBOSE_PRINT == 0 && opts->verbose) + printf("checked %" PRIu64 "/%" PRIu64 "\n", count, + nrecords); + } + if (count % VERBOSE_PRINT != 0 && opts->verbose) + printf("checked %" PRIu64 "/%" PRIu64 "\n", count, nrecords); + + /* + * Always expect at least one entry, as populate does a + * checkpoint after the first insert. + */ + testutil_assert(count > 0); + testutil_assert(ret == WT_NOTFOUND); + testutil_assert(maincur2->next(maincur2) == WT_NOTFOUND); + cursor_count_items(v0cur, &idxcount); + testutil_assert(count == idxcount); + cursor_count_items(v1cur, &idxcount); + testutil_assert(count == idxcount); + cursor_count_items(v2cur, &idxcount); + testutil_assert(count == idxcount); + + testutil_check(opts->conn->close(opts->conn, NULL)); + opts->conn = NULL; + + free(bigref); + *foundp = count; + return (0); +} + +/* + * check_values -- + * Check that the values in the cursor match the given values. + */ +static void +check_values(WT_CURSOR *cursor, int v0, int v1, int v2, char *big) +{ + int v0_got, v1_got, v2_got; + char *big_got; + + testutil_check(cursor->get_value(cursor, &v0_got, &v1_got, &v2_got, + &big_got)); + testutil_assert(v0 == v0_got); + testutil_assert(v1 == v1_got); + testutil_assert(v2 == v2_got); + testutil_assert(strcmp(big, big_got) == 0); +} + +/* + * create_big_string -- + * Create and fill the "reference" big array. + */ +static int create_big_string(char **bigp) +{ + size_t i, mod; + char *big; + + if ((big = malloc(BIG_SIZE + 1)) == NULL) + return (ENOMEM); + mod = strlen(BIG_CONTENTS); + for (i = 0; i < BIG_SIZE; i++) { + big[i] = BIG_CONTENTS[i % mod]; + } + big[BIG_SIZE] = '\0'; + *bigp = big; + return (0); +} + +/* + * cursor_count_items -- + * Count the number of items in the table by traversing + * through the cursor. + */ +static void +cursor_count_items(WT_CURSOR *cursor, uint64_t *countp) +{ + int ret; + + *countp = 0; + + cursor->reset(cursor); + while ((ret = cursor->next(cursor)) == 0) + (*countp)++; + testutil_assert(ret == WT_NOTFOUND); +} + +/* + * disable_failures -- + * Disable failures in the fail file system. + */ +static void +disable_failures(void) +{ + setenv("WT_FAIL_FS_ENABLE", "0", 1); +} + +/* + * enable_failures -- + * Enable failures in the fail file system. + */ +static void +enable_failures(uint64_t allow_writes, uint64_t allow_reads) +{ + char value[100]; + + setenv("WT_FAIL_FS_ENABLE", "1", 1); + snprintf(value, sizeof(value), "%" PRIu64, allow_writes); + setenv("WT_FAIL_FS_WRITE_ALLOW", value, 1); + snprintf(value, sizeof(value), "%" PRIu64, allow_reads); + setenv("WT_FAIL_FS_READ_ALLOW", value, 1); +} + +/* + * generate_key -- + * Generate a key used by the "subtest" and "subtest2" tables. + */ +static void +generate_key(uint32_t i, int *keyp) +{ + *keyp = (int)i; +} + +/* + * generate_value -- + * Generate values for the "subtest" table. + */ +static void +generate_value(uint32_t rndint, uint32_t i, char *bigref, + int *v0p, int *v1p, int *v2p, char **bigp) +{ + *v0p = (int)(i * 7); + *v1p = (int)(i * 10007); + *v2p = (int)(i * 100000007); + *bigp = &bigref[rndint % BIG_SIZE]; +} + +/* + * run_check_subtest -- + * Run the subtest with the given parameters and check the results. + */ +static void +run_check_subtest(TEST_OPTS *opts, const char *debugger, uint64_t nops, + bool close_test, uint64_t *nresultsp) +{ + int narg; + int estatus; + char rarg[20], sarg[20]; + char *subtest_args[MAX_ARGS]; + + narg = 0; + if (debugger != NULL) { + subtest_args[narg++] = (char *)debugger; + subtest_args[narg++] = (char *)"--"; + } + + subtest_args[narg++] = (char *)opts->progname; + /* "subtest" must appear before arguments */ + if (close_test) + subtest_args[narg++] = (char *)"subtest_close"; + else + subtest_args[narg++] = (char *)"subtest"; + subtest_args[narg++] = (char *)"-h"; + subtest_args[narg++] = opts->home; + subtest_args[narg++] = (char *)"-v"; /* subtest is always verbose */ + subtest_args[narg++] = (char *)"-p"; + subtest_args[narg++] = (char *)"-o"; + snprintf(sarg, sizeof(sarg), "%" PRIu64, nops); + subtest_args[narg++] = sarg; /* number of operations */ + subtest_args[narg++] = (char *)"-n"; + snprintf(rarg, sizeof(rarg), "%" PRIu64, opts->nrecords); + subtest_args[narg++] = rarg; /* number of records */ + subtest_args[narg++] = NULL; + testutil_assert(narg <= MAX_ARGS); + if (opts->verbose) + printf("running a separate process with %" PRIu64 + " operations until fail...\n", nops); + testutil_clean_work_dir(opts->home); + testutil_check(run_process( + opts, debugger != NULL ? debugger : opts->progname, + subtest_args, &estatus)); + if (opts->verbose) + printf("process exited %d\n", estatus); + + /* + * Verify results in parent process. + */ + testutil_check(check_results(opts, nresultsp)); +} + +/* + * run_check_subtest_range -- + * + * Run successive tests via binary search that determines the approximate + * crossover point between when data is recoverable or not. Once that is + * determined, run the subtest in a range near that crossover point. + * + * The theory is that running at the crossover point will tend to trigger + * "interesting" failures at the borderline when the checkpoint is about to, + * or has, succeeded. If any of those failures creates a WT home directory + * that cannot be recovered, the top level test will fail. + */ +static void +run_check_subtest_range(TEST_OPTS *opts, const char *debugger, bool close_test) +{ + uint64_t cutoff, high, low, mid, nops, nresults; + int i; + bool got_failure, got_success; + + if (opts->verbose) + printf("Determining best range of operations until failure, " + "with close_test %s.\n", + (close_test ? "enabled" : "disabled")); + + run_check_subtest(opts, debugger, 1, close_test, &cutoff); + low = 0; + high = MAX_OP_RANGE; + mid = (low + high) / 2; + while (mid != low) { + run_check_subtest(opts, debugger, mid, close_test, + &nresults); + if (nresults > cutoff) + high = mid; + else + low = mid; + mid = (low + high) / 2; + } + /* + * mid is the number of ops that is the crossover point. + * Run some tests near that point to try to trigger weird + * failures. If mid is too low or too high, it indicates + * there is a fundamental problem with the test. + */ + testutil_assert(mid > 1 && mid < MAX_OP_RANGE - 1); + if (opts->verbose) + printf("Retesting around %" PRIu64 " operations.\n", + mid); + + got_failure = false; + got_success = false; + for (nops = mid - 10; nops < mid + 10; nops++) { + for (i = 0; i < TESTS_PER_OP_VALUE; i++) { + run_check_subtest(opts, debugger, nops, + close_test, &nresults); + if (nresults > cutoff) + got_failure = true; + else + got_success = true; + } + } + /* + * Check that it really ran with a crossover point. + */ + testutil_assert(got_failure); + testutil_assert(got_success); +} + +/* + * run_process -- + * Run a program with arguments, wait until it completes. + */ +static int +run_process(TEST_OPTS *opts, const char *prog, char *argv[], int *status) +{ + int pid; + + if (opts->verbose) { + printf("running: "); + for (char **arg = argv; *arg != NULL; arg++) + printf("%s ", *arg); + printf("\n"); + } + if ((pid = fork()) == 0) { + execv(prog, argv); + } else if (pid < 0) + return (errno); + + waitpid(pid, status, 0); + return (0); +} + +/* + * subtest_main -- + * The main program for the subtest + */ +static int +subtest_main(int argc, char *argv[], bool close_test) +{ + TEST_OPTS *opts, _opts; + WT_SESSION *session; + char config[1024], filename[1024]; + + opts = &_opts; + if (testutil_disable_long_tests()) + return (0); + memset(opts, 0, sizeof(*opts)); + + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + /* Redirect stderr, stdout. */ + sprintf(filename, "%s/%s", opts->home, STDERR_FILE); + freopen(filename, "a", stderr); + sprintf(filename, "%s/%s", opts->home, STDOUT_FILE); + freopen(filename, "a", stdout); + snprintf(config, sizeof(config), + "create,cache_size=250M,log=(enabled)," + "transaction_sync=(enabled,method=none),extensions=(" + WT_FAIL_FS_LIB + "=(early_load,config={environment=true,verbose=true})]"); + + testutil_check(wiredtiger_open(opts->home, NULL, config, &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + + testutil_check(session->create(session, "table:subtest", + "key_format=i,value_format=iiiS," + "columns=(id,v0,v1,v2,big)")); + + testutil_check(session->create(session, "table:subtest2", + "key_format=i,value_format=i")); + + testutil_check(session->create(session, "index:subtest:v0", + "columns=(v0)")); + testutil_check(session->create(session, "index:subtest:v1", + "columns=(v1)")); + testutil_check(session->create(session, "index:subtest:v2", + "columns=(v2)")); + + testutil_check(session->close(session, NULL)); + + subtest_populate(opts, close_test); + + testutil_cleanup(opts); + + return (0); +} + +/* + * This macro is used as a substitute for testutil_check, except that it is + * aware of when a failure may be expected due to the effects of the fail_fs. + * This macro is used only in subtest_populate(), it uses local variables. + */ +#define CHECK(expr) { \ + int _ret; \ + _ret = expr; \ + if (_ret != 0) { \ + if (!failmode || \ + (_ret != WT_RUN_RECOVERY && _ret != EIO)) { \ + fprintf(stderr, " BAD RETURN %d for \"%s\"\n", \ + _ret, #expr); \ + testutil_check(_ret); \ + } else \ + failed = true; \ + } \ +} + +/* + * subtest_populate -- + * Populate the tables. + */ +static void +subtest_populate(TEST_OPTS *opts, bool close_test) +{ + WT_CURSOR *maincur, *maincur2; + WT_RAND_STATE rnd; + WT_SESSION *session; + uint64_t nrecords; + uint32_t i, rndint; + int key, v0, v1, v2; + char *big, *bigref; + bool failed, failmode; + + failmode = failed = false; + __wt_random_init_seed(NULL, &rnd); + CHECK(create_big_string(&bigref)); + nrecords = opts->nrecords; + + CHECK(opts->conn->open_session( + opts->conn, NULL, NULL, &session)); + + CHECK(session->open_cursor(session, "table:subtest", NULL, + NULL, &maincur)); + + CHECK(session->open_cursor(session, "table:subtest2", NULL, + NULL, &maincur2)); + + for (i = 0; i < nrecords && !failed; i++) { + rndint = __wt_random(&rnd); + generate_key(i, &key); + generate_value(rndint, i, bigref, &v0, &v1, &v2, &big); + CHECK(session->begin_transaction(session, NULL)); + maincur->set_key(maincur, key); + maincur->set_value(maincur, v0, v1, v2, big); + CHECK(maincur->insert(maincur)); + + maincur2->set_key(maincur2, key); + maincur2->set_value(maincur2, rndint); + CHECK(maincur2->insert(maincur2)); + CHECK(session->commit_transaction(session, NULL)); + + if (i == 0) + /* + * Force an initial checkpoint, that helps to + * distinguish a clear failure from just not running + * long enough. + */ + CHECK(session->checkpoint(session, NULL)); + + if ((i + 1) % VERBOSE_PRINT == 0 && opts->verbose) + printf(" %d/%" PRIu64 "\n", (i + 1), nrecords); + /* Attempt to isolate the failures to checkpointing. */ + if (i == (nrecords/100)) { + enable_failures(opts->nops, 1000000); + failmode = true; /* CHECK should expect failures. */ + CHECK(session->checkpoint(session, NULL)); + failmode = false; + disable_failures(); + if (failed && opts->verbose) + printf("checkpoint failed (expected).\n"); + } + } + + /* + * Closing handles after an extreme fail is likely to cause + * cascading failures (or crashes), so recommended practice is + * to immediately exit. We're interested in testing both with + * and without the recommended practice. + */ + if (failed) { + if (!close_test) { + fprintf(stderr, "exit early.\n"); + exit(0); + } else + fprintf(stderr, "closing after failure.\n"); + } + + free(bigref); + CHECK(maincur->close(maincur)); + CHECK(maincur2->close(maincur2)); + CHECK(session->close(session, NULL)); +} + +/* + * main -- + * The main program for the test. When invoked with "subtest" + * argument, run the subtest. Otherwise, run a separate process + * for each needed subtest, and check the results. + */ +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + uint64_t nresults; + const char *debugger; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + debugger = NULL; + + testutil_check(testutil_parse_opts(argc, argv, opts)); + argc -= __wt_optind; + argv += __wt_optind; + if (opts->nrecords == 0) + opts->nrecords = 50000; + + while (argc > 0) { + if (strcmp(argv[0], "subtest") == 0) + return (subtest_main(argc, argv, false)); + else if (strcmp(argv[0], "subtest_close") == 0) + return (subtest_main(argc, argv, true)); + else if (strcmp(argv[0], "gdb") == 0) + debugger = "/usr/bin/gdb"; + else + testutil_assert(false); + argc--; + argv++; + } + if (opts->verbose) { + printf("Number of operations until failure: %" PRIu64 + " (change with -o N)\n", opts->nops); + printf("Number of records: %" PRIu64 + " (change with -n N)\n", opts->nrecords); + } + if (opts->nops == 0) { + run_check_subtest_range(opts, debugger, false); + run_check_subtest_range(opts, debugger, true); + } else + run_check_subtest(opts, debugger, opts->nops, + opts->nrecords, &nresults); + + testutil_clean_work_dir(opts->home); + testutil_cleanup(opts); + + return (0); +} diff --git a/test/utility/misc.c b/test/utility/misc.c index 1491c9a6938..1ba08ddd77f 100644 --- a/test/utility/misc.c +++ b/test/utility/misc.c @@ -78,7 +78,7 @@ testutil_work_dir_from_path(char *buffer, size_t len, const char *dir) * Remove the work directory. */ void -testutil_clean_work_dir(char *dir) +testutil_clean_work_dir(const char *dir) { size_t len; int ret; diff --git a/test/utility/test_util.h b/test/utility/test_util.h index f6a9cd68e02..489bbe18d87 100644 --- a/test/utility/test_util.h +++ b/test/utility/test_util.h @@ -183,7 +183,7 @@ void *dmalloc(size_t); void *drealloc(void *, size_t); void *dstrdup(const void *); void *dstrndup(const char *, size_t); -void testutil_clean_work_dir(char *); +void testutil_clean_work_dir(const char *); void testutil_cleanup(TEST_OPTS *); bool testutil_disable_long_tests(void); void testutil_make_work_dir(char *); -- cgit v1.2.1 From e66634960eeaf60d1b13c26308053e0baf51030b Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 14 Feb 2017 08:36:14 -0500 Subject: WT-2909 Create automatable test verifying checkpoint integrity after errors (#3295) * WT-2909 Create automatable test verifying checkpoint integrity after errors Make gcc 4.7 work again. * Linux (Red Hat 5.3.1-6) declares backtrace(3) to return an int, FreeBSD (10.3-RELEASE-p11) declares it to return a size_t. * Remove repeated #include files, check for error returns from a few functions. * The Linux/FreeBSD backtrace() calls are fundamentally incompatible, add an #ifdef. --- ext/test/fail_fs/fail_fs.c | 13 +++++++--- test/csuite/wt2909_checkpoint_integrity/main.c | 33 +++++++++++++------------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/ext/test/fail_fs/fail_fs.c b/ext/test/fail_fs/fail_fs.c index a6376ce203b..0ea4a7d5e00 100644 --- a/ext/test/fail_fs/fail_fs.c +++ b/ext/test/fail_fs/fail_fs.c @@ -95,8 +95,7 @@ static void fail_file_handle_remove(WT_SESSION *, FAIL_FILE_HANDLE *); static int fail_file_lock(WT_FILE_HANDLE *, WT_SESSION *, bool); static int fail_file_read( WT_FILE_HANDLE *, WT_SESSION *, wt_off_t, size_t, void *); -static int fail_file_size( - WT_FILE_HANDLE *, WT_SESSION *, wt_off_t *); +static int fail_file_size(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t *); static int fail_file_sync(WT_FILE_HANDLE *, WT_SESSION *); static int fail_file_truncate(WT_FILE_HANDLE *, WT_SESSION *, wt_off_t); static int fail_file_write( @@ -676,7 +675,11 @@ fail_fs_simulate_fail(FAIL_FILE_HANDLE *fail_fh, WT_SESSION *session, { FAIL_FILE_SYSTEM *fail_fs; WT_EXTENSION_API *wtext; +#ifdef __linux__ int btret, i; +#else + size_t btret, i; +#endif void *bt[100]; char **btstr; @@ -686,7 +689,11 @@ fail_fs_simulate_fail(FAIL_FILE_HANDLE *fail_fh, WT_SESSION *session, (void)wtext->msg_printf(wtext, session, "fail_fs: %s: simulated failure after %" PRId64 " %s operations\n", fail_fh->iface.name, nops, opkind); - btret = backtrace(bt, sizeof(bt)/sizeof(bt[0])); +#ifdef __linux__ + btret = backtrace(bt, (int)(sizeof(bt) / sizeof(bt[0]))); +#else + btret = backtrace(bt, sizeof(bt) / sizeof(bt[0])); +#endif if ((btstr = backtrace_symbols(bt, btret)) != NULL) { for (i = 0; i < btret; i++) (void)wtext->msg_printf(wtext, session, " %s", diff --git a/test/csuite/wt2909_checkpoint_integrity/main.c b/test/csuite/wt2909_checkpoint_integrity/main.c index efc459ff271..bf7f86cfd07 100644 --- a/test/csuite/wt2909_checkpoint_integrity/main.c +++ b/test/csuite/wt2909_checkpoint_integrity/main.c @@ -27,8 +27,6 @@ */ #include "test_util.h" -#include -#include #include /* @@ -243,7 +241,7 @@ cursor_count_items(WT_CURSOR *cursor, uint64_t *countp) *countp = 0; - cursor->reset(cursor); + testutil_check(cursor->reset(cursor)); while ((ret = cursor->next(cursor)) == 0) (*countp)++; testutil_assert(ret == WT_NOTFOUND); @@ -256,7 +254,7 @@ cursor_count_items(WT_CURSOR *cursor, uint64_t *countp) static void disable_failures(void) { - setenv("WT_FAIL_FS_ENABLE", "0", 1); + testutil_check(setenv("WT_FAIL_FS_ENABLE", "0", 1)); } /* @@ -268,11 +266,11 @@ enable_failures(uint64_t allow_writes, uint64_t allow_reads) { char value[100]; - setenv("WT_FAIL_FS_ENABLE", "1", 1); + testutil_check(setenv("WT_FAIL_FS_ENABLE", "1", 1)); snprintf(value, sizeof(value), "%" PRIu64, allow_writes); - setenv("WT_FAIL_FS_WRITE_ALLOW", value, 1); + testutil_check(setenv("WT_FAIL_FS_WRITE_ALLOW", value, 1)); snprintf(value, sizeof(value), "%" PRIu64, allow_reads); - setenv("WT_FAIL_FS_READ_ALLOW", value, 1); + testutil_check(setenv("WT_FAIL_FS_READ_ALLOW", value, 1)); } /* @@ -307,10 +305,8 @@ static void run_check_subtest(TEST_OPTS *opts, const char *debugger, uint64_t nops, bool close_test, uint64_t *nresultsp) { - int narg; - int estatus; - char rarg[20], sarg[20]; - char *subtest_args[MAX_ARGS]; + int estatus, narg; + char rarg[20], sarg[20], *subtest_args[MAX_ARGS]; narg = 0; if (debugger != NULL) { @@ -427,19 +423,21 @@ static int run_process(TEST_OPTS *opts, const char *prog, char *argv[], int *status) { int pid; + char **arg; if (opts->verbose) { printf("running: "); - for (char **arg = argv; *arg != NULL; arg++) + for (arg = argv; *arg != NULL; arg++) printf("%s ", *arg); printf("\n"); } if ((pid = fork()) == 0) { - execv(prog, argv); + (void)execv(prog, argv); + testutil_die(errno, "%s", prog); } else if (pid < 0) return (errno); - waitpid(pid, status, 0); + (void)waitpid(pid, status, 0); return (0); } @@ -464,9 +462,9 @@ subtest_main(int argc, char *argv[], bool close_test) /* Redirect stderr, stdout. */ sprintf(filename, "%s/%s", opts->home, STDERR_FILE); - freopen(filename, "a", stderr); + testutil_assert(freopen(filename, "a", stderr) != NULL); sprintf(filename, "%s/%s", opts->home, STDOUT_FILE); - freopen(filename, "a", stdout); + testutil_assert(freopen(filename, "a", stdout) != NULL); snprintf(config, sizeof(config), "create,cache_size=250M,log=(enabled)," "transaction_sync=(enabled,method=none),extensions=(" @@ -572,7 +570,8 @@ subtest_populate(TEST_OPTS *opts, bool close_test) CHECK(session->checkpoint(session, NULL)); if ((i + 1) % VERBOSE_PRINT == 0 && opts->verbose) - printf(" %d/%" PRIu64 "\n", (i + 1), nrecords); + printf(" %" PRIu32 "/%" PRIu64 "\n", + (i + 1), nrecords); /* Attempt to isolate the failures to checkpointing. */ if (i == (nrecords/100)) { enable_failures(opts->nops, 1000000); -- cgit v1.2.1 From 152d4778f58fe8d9448c530c7cda07801499e8d7 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 14 Feb 2017 10:57:35 -0500 Subject: WT-2909 Create automatable test verifying checkpoint integrity after errors (#3296) FreeBSD's backtrace is the outlier, everybody else (OS X, Solaris, Linux) is using int types, not size_t. --- ext/test/fail_fs/fail_fs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ext/test/fail_fs/fail_fs.c b/ext/test/fail_fs/fail_fs.c index 0ea4a7d5e00..9445dbf9aca 100644 --- a/ext/test/fail_fs/fail_fs.c +++ b/ext/test/fail_fs/fail_fs.c @@ -675,10 +675,10 @@ fail_fs_simulate_fail(FAIL_FILE_HANDLE *fail_fh, WT_SESSION *session, { FAIL_FILE_SYSTEM *fail_fs; WT_EXTENSION_API *wtext; -#ifdef __linux__ - int btret, i; -#else +#ifdef __FreeBSD__ size_t btret, i; +#else + int btret, i; #endif void *bt[100]; char **btstr; @@ -689,10 +689,10 @@ fail_fs_simulate_fail(FAIL_FILE_HANDLE *fail_fh, WT_SESSION *session, (void)wtext->msg_printf(wtext, session, "fail_fs: %s: simulated failure after %" PRId64 " %s operations\n", fail_fh->iface.name, nops, opkind); -#ifdef __linux__ - btret = backtrace(bt, (int)(sizeof(bt) / sizeof(bt[0]))); -#else +#ifdef __FreeBSD__ btret = backtrace(bt, sizeof(bt) / sizeof(bt[0])); +#else + btret = backtrace(bt, (int)(sizeof(bt) / sizeof(bt[0]))); #endif if ((btstr = backtrace_symbols(bt, btret)) != NULL) { for (i = 0; i < btret; i++) -- cgit v1.2.1 From a6a0483f2b4f1617bc1aa1179685b74bad990290 Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Tue, 14 Feb 2017 14:30:51 -0500 Subject: WT-3180 bug fix: disable long tests in the top-level main program, (#3298) rather than the subtest. Disable core files for the subtest, as they are rarely interesting. Fix some uint64 values/parameters that were declared as uint32. --- test/csuite/wt2909_checkpoint_integrity/main.c | 29 ++++++++++++++++---------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/test/csuite/wt2909_checkpoint_integrity/main.c b/test/csuite/wt2909_checkpoint_integrity/main.c index bf7f86cfd07..ddf249fb406 100644 --- a/test/csuite/wt2909_checkpoint_integrity/main.c +++ b/test/csuite/wt2909_checkpoint_integrity/main.c @@ -27,6 +27,8 @@ */ #include "test_util.h" +#include +#include #include /* @@ -87,8 +89,8 @@ static int create_big_string(char **); static void cursor_count_items(WT_CURSOR *, uint64_t *); static void disable_failures(void); static void enable_failures(uint64_t, uint64_t); -static void generate_key(uint32_t, int *); -static void generate_value(uint32_t, uint32_t, char *, int *, int *, int *, +static void generate_key(uint64_t, int *); +static void generate_value(uint32_t, uint64_t, char *, int *, int *, int *, char **); static void run_check_subtest(TEST_OPTS *, const char *, uint64_t, bool, uint64_t *); @@ -140,9 +142,8 @@ check_results(TEST_OPTS *opts, uint64_t *foundp) testutil_check(maincur2->get_key(maincur2, &key_got)); testutil_check(maincur2->get_value(maincur2, &rndint)); - generate_key((uint32_t)count, &key); - generate_value(rndint, (uint32_t)count, - bigref, &v0, &v1, &v2, &big); + generate_key(count, &key); + generate_value(rndint, count, bigref, &v0, &v1, &v2, &big); testutil_assert(key == key_got); /* Check the key/values in main table. */ @@ -278,7 +279,7 @@ enable_failures(uint64_t allow_writes, uint64_t allow_reads) * Generate a key used by the "subtest" and "subtest2" tables. */ static void -generate_key(uint32_t i, int *keyp) +generate_key(uint64_t i, int *keyp) { *keyp = (int)i; } @@ -288,7 +289,7 @@ generate_key(uint32_t i, int *keyp) * Generate values for the "subtest" table. */ static void -generate_value(uint32_t rndint, uint32_t i, char *bigref, +generate_value(uint32_t rndint, uint64_t i, char *bigref, int *v0p, int *v1p, int *v2p, char **bigp) { *v0p = (int)(i * 7); @@ -451,12 +452,16 @@ subtest_main(int argc, char *argv[], bool close_test) TEST_OPTS *opts, _opts; WT_SESSION *session; char config[1024], filename[1024]; + struct rlimit rlim; - opts = &_opts; if (testutil_disable_long_tests()) return (0); + opts = &_opts; memset(opts, 0, sizeof(*opts)); + memset(&rlim, 0, sizeof(rlim)); + /* No core files during fault injection tests. */ + testutil_check(setrlimit(RLIMIT_CORE, &rlim)); testutil_check(testutil_parse_opts(argc, argv, opts)); testutil_make_work_dir(opts->home); @@ -527,8 +532,8 @@ subtest_populate(TEST_OPTS *opts, bool close_test) WT_CURSOR *maincur, *maincur2; WT_RAND_STATE rnd; WT_SESSION *session; - uint64_t nrecords; - uint32_t i, rndint; + uint64_t i, nrecords; + uint32_t rndint; int key, v0, v1, v2; char *big, *bigref; bool failed, failmode; @@ -570,7 +575,7 @@ subtest_populate(TEST_OPTS *opts, bool close_test) CHECK(session->checkpoint(session, NULL)); if ((i + 1) % VERBOSE_PRINT == 0 && opts->verbose) - printf(" %" PRIu32 "/%" PRIu64 "\n", + printf(" %" PRIu64 "/%" PRIu64 "\n", (i + 1), nrecords); /* Attempt to isolate the failures to checkpointing. */ if (i == (nrecords/100)) { @@ -617,6 +622,8 @@ main(int argc, char *argv[]) uint64_t nresults; const char *debugger; + if (testutil_disable_long_tests()) + return (0); opts = &_opts; memset(opts, 0, sizeof(*opts)); debugger = NULL; -- cgit v1.2.1 From a53bb9683b7f8e4fda3c6272ec8224857e756ba8 Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Tue, 14 Feb 2017 16:30:53 -0500 Subject: WT-3179 test bug: clang sanitizer failure in fail_fs #3300 hold the fs lock while manipulating the list of file handles. --- ext/test/fail_fs/fail_fs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ext/test/fail_fs/fail_fs.c b/ext/test/fail_fs/fail_fs.c index 9445dbf9aca..cb87b43bfd9 100644 --- a/ext/test/fail_fs/fail_fs.c +++ b/ext/test/fail_fs/fail_fs.c @@ -156,11 +156,13 @@ static int fail_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *session) { FAIL_FILE_HANDLE *fail_fh; + FAIL_FILE_SYSTEM *fail_fs; int ret; (void)session; /* Unused */ fail_fh = (FAIL_FILE_HANDLE *)file_handle; + fail_fs = fail_fh->fail_fs; /* * We don't actually open an fd when opening directories for flushing, @@ -170,14 +172,16 @@ fail_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *session) return (0); ret = close(fail_fh->fd); fail_fh->fd = -1; + fail_fs_lock(&fail_fs->lock); fail_file_handle_remove(session, fail_fh); + fail_fs_unlock(&fail_fs->lock); return (ret); } /* * fail_file_handle_remove -- * Destroy an in-memory file handle. Should only happen on remove or - * shutdown. + * shutdown. The file system lock must be held during this call. */ static void fail_file_handle_remove(WT_SESSION *session, FAIL_FILE_HANDLE *fail_fh) -- cgit v1.2.1 From 7a725a97d281095280515b0609f0e61747fd1b58 Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Tue, 14 Feb 2017 17:21:07 -0500 Subject: WT-3179 test bug: clang sanitizer failure in fail_fs Replaced a fprintf call, and cleaned up a call to access system call. --- ext/test/fail_fs/fail_fs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ext/test/fail_fs/fail_fs.c b/ext/test/fail_fs/fail_fs.c index cb87b43bfd9..d0d8a14c8c2 100644 --- a/ext/test/fail_fs/fail_fs.c +++ b/ext/test/fail_fs/fail_fs.c @@ -536,7 +536,7 @@ fail_fs_exist(WT_FILE_SYSTEM *file_system, (void)file_system; /* Unused */ (void)session; /* Unused */ - *existp = (access(name, 0) == 0); + *existp = (access(name, F_OK) == 0); return (0); } @@ -551,6 +551,7 @@ fail_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, { FAIL_FILE_HANDLE *fail_fh; FAIL_FILE_SYSTEM *fail_fs; + WT_EXTENSION_API *wtext; WT_FILE_HANDLE *file_handle; int fd, open_flags, ret; @@ -563,8 +564,11 @@ fail_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session, fd = -1; ret = 0; - if (fail_fs->verbose) - fprintf(stderr, "fail_fs: open: %s\n", name); + if (fail_fs->verbose) { + wtext = fail_fs->wtext; + (void)wtext->msg_printf(wtext, session, "fail_fs: open: %s", + name); + } fail_fs_lock(&fail_fs->lock); @@ -692,7 +696,7 @@ fail_fs_simulate_fail(FAIL_FILE_HANDLE *fail_fh, WT_SESSION *session, wtext = fail_fs->wtext; (void)wtext->msg_printf(wtext, session, "fail_fs: %s: simulated failure after %" PRId64 - " %s operations\n", fail_fh->iface.name, nops, opkind); + " %s operations", fail_fh->iface.name, nops, opkind); #ifdef __FreeBSD__ btret = backtrace(bt, sizeof(bt) / sizeof(bt[0])); #else -- cgit v1.2.1 From 70b5ab64d84cb8a22553def853ddb1a11393ff73 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 15 Feb 2017 18:08:10 +1100 Subject: WT-3149 Make random lookups for eviction more lightweight. (#3302) Eviction walks don't need to start on leaf pages: just try to descend through the tree and as soon as we can't swap to a child page, start the walk from the parent. --- src/btree/bt_random.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/btree/bt_random.c b/src/btree/bt_random.c index 3cc6838c4c8..44de511f787 100644 --- a/src/btree/bt_random.c +++ b/src/btree/bt_random.c @@ -166,7 +166,7 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) /* * __wt_random_descent -- - * Find a random leaf page in a tree. + * Find a random page in a tree for either sampling or eviction. */ int __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, bool eviction) @@ -183,9 +183,11 @@ __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, bool eviction) retry = 100; /* Eviction should not be tapped to do eviction. */ - flags = WT_READ_RESTART_OK; if (eviction) - LF_SET(WT_READ_NO_EVICT); + flags = WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | + WT_READ_NO_WAIT | WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK; + else + flags = WT_READ_RESTART_OK; if (0) { restart: /* @@ -205,6 +207,13 @@ restart: /* WT_INTL_INDEX_GET(session, page, pindex); entries = pindex->entries; + /* Eviction just wants any random child. */ + if (eviction) { + descent = pindex->index[ + __wt_random(&session->rnd) % entries]; + goto descend; + } + /* * There may be empty pages in the tree, and they're useless to * us. If we don't find a non-empty page in "entries" random @@ -212,10 +221,8 @@ restart: /* * search page contains nothing other than empty pages, restart * from the root some number of times before giving up. * - * Eviction is only looking for a place in the cache and so only - * wants in-memory pages (but a deleted page is fine); currently - * our other caller is looking for a key/value pair on a random - * leave page, and so will accept any page that contains a valid + * Random sampling is looking for a key/value pair on a random + * leaf page, and so will accept any page that contains a valid * key/value pair, so on-disk is fine, but deleted is not. */ descent = NULL; @@ -223,15 +230,14 @@ restart: /* descent = pindex->index[__wt_random(&session->rnd) % entries]; if (descent->state == WT_REF_MEM || - (!eviction && descent->state == WT_REF_DISK)) + descent->state == WT_REF_DISK) break; } if (i == entries) for (i = 0; i < entries; ++i) { descent = pindex->index[i]; if (descent->state == WT_REF_MEM || - (!eviction && - descent->state == WT_REF_DISK)) + descent->state == WT_REF_DISK) break; } if (i == entries || descent == NULL) { @@ -249,17 +255,25 @@ restart: /* * On other error, simply return, the swap call ensures we're * holding nothing on failure. */ - if ((ret = +descend: if ((ret = __wt_page_swap(session, current, descent, flags)) == 0) { current = descent; continue; } + if (eviction && (ret == WT_NOTFOUND || ret == WT_RESTART)) + break; if (ret == WT_RESTART) goto restart; return (ret); } - *refp = current; + /* + * There is no point starting with the root page: the walk will exit + * immediately. In that case we aren't holding a hazard pointer so + * there is nothing to release. + */ + if (!eviction || !__wt_ref_is_root(current)) + *refp = current; return (0); } -- cgit v1.2.1 From 83ce29217f0bebad1c0a86e4eb827a70216b4641 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Wed, 15 Feb 2017 16:38:07 -0500 Subject: WT-3186 Fix error path and panic detection in logging loops. (#3304) --- src/include/extern.h | 2 +- src/log/log.c | 6 +++++- src/log/log_slot.c | 5 ++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/include/extern.h b/src/include/extern.h index 8e55077c2a9..19ad9a880df 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -406,7 +406,7 @@ extern int __wt_log_slot_switch( WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bo extern int __wt_log_slot_new(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int64_t __wt_log_slot_release(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, int64_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_clsm_request_switch(WT_CURSOR_LSM *clsm) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/log/log.c b/src/log/log.c index b07ef8c1bd5..d6caa55f8c7 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -2132,7 +2132,11 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, WT_STAT_CONN_INCR(session, log_writes); - __wt_log_slot_join(session, rdup_len, flags, &myslot); + /* + * The only time joining a slot should ever return an error is if it + * detects a panic. + */ + WT_ERR(__wt_log_slot_join(session, rdup_len, flags, &myslot)); /* * If the addition of this record crosses the buffer boundary, * switch in a new slot. diff --git a/src/log/log_slot.c b/src/log/log_slot.c index d6e692f8c51..542f010ea53 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -160,6 +160,7 @@ retry: #endif if (WT_LOG_SLOT_UNBUFFERED_ISSET(old_state)) { while (slot->slot_unbuffered == 0) { + WT_RET(WT_SESSION_CHECK_PANIC(session)); __wt_yield(); #ifdef HAVE_DIAGNOSTIC ++count; @@ -464,7 +465,7 @@ __wt_log_slot_destroy(WT_SESSION_IMPL *session) * __wt_log_slot_join -- * Join a consolidated logging slot. */ -void +int __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) { @@ -498,6 +499,7 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, } for (;;) { WT_BARRIER(); + WT_RET(WT_SESSION_CHECK_PANIC(session)); slot = log->active_slot; old_state = slot->slot_state; if (WT_LOG_SLOT_OPEN(old_state)) { @@ -555,6 +557,7 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, myslot->slot = slot; myslot->offset = join_offset; myslot->end_offset = (wt_off_t)((uint64_t)join_offset + mysize); + return (0); } /* -- cgit v1.2.1 From 8a1adcc4a1c4c25e1270290a8eb21173f41e83a9 Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Thu, 16 Feb 2017 00:21:26 -0500 Subject: WT-3184 bug fix: special case searching an index that has a custom collator. (#3303) In this case, we must use the entire (raw) key to duplicate the position, instead of truncating to the visible part. --- src/cursor/cur_index.c | 3 +- src/cursor/cur_std.c | 7 +- src/include/wiredtiger.in | 5 +- test/csuite/Makefile.am | 3 + test/csuite/wt3184_dup_index_collator/main.c | 168 +++++++++++++++++++++++++++ 5 files changed, 181 insertions(+), 5 deletions(-) create mode 100644 test/csuite/wt3184_dup_index_collator/main.c diff --git a/src/cursor/cur_index.c b/src/cursor/cur_index.c index 13180efdea4..6fc01c0421f 100644 --- a/src/cursor/cur_index.c +++ b/src/cursor/cur_index.c @@ -245,7 +245,8 @@ __curindex_search(WT_CURSOR *cursor) * Custom collators expect to see complete keys, pass an item containing * all the visible fields so it unpacks correctly. */ - if (cindex->index->collator != NULL) + if (cindex->index->collator != NULL && + !F_ISSET(cursor, WT_CURSTD_RAW_SEARCH)) WT_ERR(__wt_struct_repack(session, child->key_format, cindex->iface.key_format, &child->key, &found_key)); else diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c index 7ace6d49cf0..99a9e373354 100644 --- a/src/cursor/cur_std.c +++ b/src/cursor/cur_std.c @@ -633,6 +633,7 @@ __wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config) int __wt_cursor_dup_position(WT_CURSOR *to_dup, WT_CURSOR *cursor) { + WT_DECL_RET; WT_ITEM key; /* @@ -662,9 +663,11 @@ __wt_cursor_dup_position(WT_CURSOR *to_dup, WT_CURSOR *cursor) * cursors cannot reference application memory after cursor operations * and that requirement will save the day. */ - WT_RET(cursor->search(cursor)); + F_SET(cursor, WT_CURSTD_RAW_SEARCH); + ret = cursor->search(cursor); + F_CLR(cursor, WT_CURSTD_RAW_SEARCH); - return (0); + return (ret); } /* diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index d1e3d383396..c148e759299 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -576,8 +576,9 @@ struct __wt_cursor { #define WT_CURSTD_OPEN 0x00200 #define WT_CURSTD_OVERWRITE 0x00400 #define WT_CURSTD_RAW 0x00800 -#define WT_CURSTD_VALUE_EXT 0x01000 /* Value points out of the tree. */ -#define WT_CURSTD_VALUE_INT 0x02000 /* Value points into the tree. */ +#define WT_CURSTD_RAW_SEARCH 0x01000 +#define WT_CURSTD_VALUE_EXT 0x02000 /* Value points out of the tree. */ +#define WT_CURSTD_VALUE_INT 0x04000 /* Value points into the tree. */ #define WT_CURSTD_VALUE_SET (WT_CURSTD_VALUE_EXT | WT_CURSTD_VALUE_INT) uint32_t flags; #endif diff --git a/test/csuite/Makefile.am b/test/csuite/Makefile.am index 0158d0c96d1..e2b72532703 100644 --- a/test/csuite/Makefile.am +++ b/test/csuite/Makefile.am @@ -49,6 +49,9 @@ noinst_PROGRAMS += test_wt3120_filesys test_wt3135_search_near_collator_SOURCES = wt3135_search_near_collator/main.c noinst_PROGRAMS += test_wt3135_search_near_collator +test_wt3184_dup_index_collator_SOURCES = wt3184_dup_index_collator/main.c +noinst_PROGRAMS += test_wt3184_dup_index_collator + # Run this during a "make check" smoke test. TESTS = $(noinst_PROGRAMS) LOG_COMPILER = $(TEST_WRAPPER) diff --git a/test/csuite/wt3184_dup_index_collator/main.c b/test/csuite/wt3184_dup_index_collator/main.c new file mode 100644 index 00000000000..bcefd2f1a3b --- /dev/null +++ b/test/csuite/wt3184_dup_index_collator/main.c @@ -0,0 +1,168 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +/* + * JIRA ticket reference: WT-3184 + * Test case description: Each set of data is ordered and contains + * five elements (0-4). We insert elements 1 and 3, and then do + * search_near and search for each element. For each set of data, we perform + * these tests first using a custom collator, and second using a custom collator + * and extractor. In each case there are index keys having variable length. + * Failure mode: In the reported test case, the custom compare routine is + * given a truncated key to compare, and the unpack functions return errors + * because the truncation appeared in the middle of a key. + */ + +static int +compare_int(int32_t a, int32_t b) +{ + return (a < b ? -1 : (a > b ? 1 : 0)); +} + +static int32_t +item_to_int(WT_ITEM *item) +{ + testutil_assert(item->size == sizeof(int32_t)); + return (*(int32_t *)item->data); +} + +static int +compare_int_items(WT_ITEM *itema, WT_ITEM *itemb) +{ + testutil_assert(itema->size == sizeof(int32_t)); + testutil_assert(itemb->size == sizeof(int32_t)); + return (compare_int(item_to_int(itema), item_to_int(itemb))); +} + +static void +print_int_item(const char *str, const WT_ITEM *item) +{ + if (item->size > 0) { + testutil_assert(item->size == sizeof(int32_t)); + printf("%s%" PRId32, str, *(int32_t *)item->data); + } else + printf("%s", str); +} + +static int +index_compare(WT_COLLATOR *collator, WT_SESSION *session, + const WT_ITEM *key1, const WT_ITEM *key2, int *cmp) +{ + WT_ITEM ikey1, pkey1, ikey2, pkey2; + + (void)collator; + testutil_check(wiredtiger_struct_unpack(session, + key1->data, key1->size, "uu", &ikey1, &pkey1)); + testutil_check(wiredtiger_struct_unpack(session, + key2->data, key2->size, "uu", &ikey2, &pkey2)); + + print_int_item("index_compare: index key1 = ", &ikey1); + print_int_item(", primary key1 = ", &pkey1); + print_int_item(", index key2 = ", &ikey2); + print_int_item(", primary key2 = ", &pkey2); + printf("\n"); + + if ((*cmp = compare_int_items(&ikey1, &ikey2)) != 0) + return (0); + + if (pkey1.size != 0 && pkey2.size != 0) + *cmp = compare_int_items(&pkey1, &pkey2); + else if (pkey1.size != 0) + *cmp = 1; + else if (pkey2.size != 0) + *cmp = -1; + else + *cmp = 0; + + return (0); +} + +static WT_COLLATOR index_coll = { index_compare, NULL, NULL }; + +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + WT_CURSOR *cursor, *cursor1; + WT_ITEM got, k, v; + WT_SESSION *session; + int32_t ki, vi; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + testutil_check(wiredtiger_open(opts->home, NULL, "create", + &opts->conn)); + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &session)); + + testutil_check(opts->conn->add_collator(opts->conn, "index_coll", + &index_coll, NULL)); + + testutil_check(session->create(session, + "table:main", "key_format=u,value_format=u,columns=(k,v)")); + testutil_check(session->create(session, + "index:main:index", "columns=(v),collator=index_coll")); + + printf("adding new record\n"); + testutil_check(session->open_cursor(session, "table:main", NULL, NULL, + &cursor)); + + ki = 13; + vi = 17; + + k.data = &ki; k.size = sizeof(ki); + v.data = &vi; v.size = sizeof(vi); + + cursor->set_key(cursor, &k); + cursor->set_value(cursor, &v); + testutil_check(cursor->insert(cursor)); + testutil_check(cursor->close(cursor)); + + printf("positioning index cursor\n"); + + testutil_check(session->open_cursor(session, "index:main:index", NULL, + NULL, &cursor)); + cursor->set_key(cursor, &v); + testutil_check(cursor->search(cursor)); + + printf("duplicating cursor\n"); + testutil_check(session->open_cursor(session, NULL, cursor, NULL, + &cursor1)); + cursor->get_value(cursor, &got); + testutil_assert(item_to_int(&got) == 17); + cursor1->get_value(cursor1, &got); + testutil_assert(item_to_int(&got) == 17); + + testutil_check(session->close(session, NULL)); + testutil_cleanup(opts); + return (EXIT_SUCCESS); +} -- cgit v1.2.1 From 30036d415f83b4b376750bcc122ff8f43b829205 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Thu, 16 Feb 2017 21:38:32 -0500 Subject: WT-3188 More log loops needing to check panic. (#3307) --- src/log/log.c | 19 ++++++++++++++----- src/log/log_slot.c | 2 ++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/log/log.c b/src/log/log.c index d6caa55f8c7..3477ca52502 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -24,7 +24,7 @@ static int __log_write_internal( * __log_wait_for_earlier_slot -- * Wait for write_lsn to catch up to this slot. */ -static void +static int __log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) { WT_CONNECTION_IMPL *conn; @@ -41,6 +41,7 @@ __log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) * unlock in case an earlier thread is trying to switch its * slot and complete its operation. */ + WT_RET(WT_SESSION_CHECK_PANIC(session)); if (F_ISSET(session, WT_SESSION_LOCKED_SLOT)) __wt_spin_unlock(session, &log->log_slot_lock); __wt_cond_signal(session, conn->log_wrlsn_cond); @@ -51,6 +52,7 @@ __log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) if (F_ISSET(session, WT_SESSION_LOCKED_SLOT)) __wt_spin_lock(session, &log->log_slot_lock); } + return (0); } /* @@ -70,7 +72,7 @@ __log_fs_write(WT_SESSION_IMPL *session, * be a hole at the end of the previous log file that we cannot detect. */ if (slot->slot_release_lsn.l.file < slot->slot_start_lsn.l.file) { - __log_wait_for_earlier_slot(session, slot); + WT_RET(__log_wait_for_earlier_slot(session, slot)); WT_RET(__wt_log_force_sync(session, &slot->slot_release_lsn)); } if ((ret = __wt_write(session, slot->slot_fh, offset, len, buf)) != 0) @@ -110,6 +112,7 @@ __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) conn = S2C(session); log = conn->log; + WT_RET(WT_SESSION_CHECK_PANIC(session)); WT_RET(__wt_log_force_write(session, 1, NULL)); __wt_log_wrlsn(session, NULL); if (start) @@ -174,6 +177,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) * log file ready to close. */ while (log->sync_lsn.l.file < min_lsn->l.file) { + WT_RET(WT_SESSION_CHECK_PANIC(session)); __wt_cond_signal(session, S2C(session)->log_file_cond); __wt_cond_wait(session, log->log_sync_cond, 10000, NULL); } @@ -1467,7 +1471,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) * be holes in the log file. */ WT_STAT_CONN_INCR(session, log_release_write_lsn); - __log_wait_for_earlier_slot(session, slot); + WT_ERR(__log_wait_for_earlier_slot(session, slot)); log->write_start_lsn = slot->slot_start_lsn; log->write_lsn = slot->slot_end_lsn; @@ -1488,6 +1492,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) * current fsync completes and advance log->sync_lsn. */ while (F_ISSET(slot, WT_SLOT_SYNC | WT_SLOT_SYNC_DIR)) { + WT_ERR(WT_SESSION_CHECK_PANIC(session)); /* * We have to wait until earlier log files have finished their * sync operations. The most recent one will set the LSN to the @@ -2178,15 +2183,19 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, if (LF_ISSET(WT_LOG_FLUSH)) { /* Wait for our writes to reach the OS */ while (__wt_log_cmp(&log->write_lsn, &lsn) <= 0 && - myslot.slot->slot_error == 0) + myslot.slot->slot_error == 0) { + WT_ERR(WT_SESSION_CHECK_PANIC(session)); __wt_cond_wait( session, log->log_write_cond, 10000, NULL); + } } else if (LF_ISSET(WT_LOG_FSYNC)) { /* Wait for our writes to reach disk */ while (__wt_log_cmp(&log->sync_lsn, &lsn) <= 0 && - myslot.slot->slot_error == 0) + myslot.slot->slot_error == 0) { + WT_ERR(WT_SESSION_CHECK_PANIC(session)); __wt_cond_wait( session, log->log_sync_cond, 10000, NULL); + } } /* diff --git a/src/log/log_slot.c b/src/log/log_slot.c index 542f010ea53..b4655ff6c1a 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -220,6 +220,7 @@ __log_slot_switch_internal( if (slot != log->active_slot) return (0); + WT_RET(WT_SESSION_CHECK_PANIC(session)); /* * We may come through here multiple times if we were able to close * a slot but could not set up a new one. If we closed it already, @@ -582,6 +583,7 @@ __wt_log_slot_release(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, int64_t size) * was written rather than the beginning record of the slot. */ while ((cur_offset = slot->slot_last_offset) < my_start) { + WT_RET(WT_SESSION_CHECK_PANIC(session)); /* * Set our offset if we are larger. */ -- cgit v1.2.1 From db4cfede16a49dfca37303e713ddb171c041a6b9 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Sat, 18 Feb 2017 08:53:32 +1100 Subject: WT-3187 Ramp up aggressiveness in reducing cache pool usage (#3306) * WT-3187 Ramp up aggressiveness in reducing cache pool usage We could get into situations where no participants looked like good candidates. Also put a failsafe into the balance loop, to ensure future failures to reduce usage won't lead to hang on shutdown. * KNF and wordsmithing. --- src/conn/conn_cache_pool.c | 56 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c index 49b766f4602..ed078991581 100644 --- a/src/conn/conn_cache_pool.c +++ b/src/conn/conn_cache_pool.c @@ -418,8 +418,9 @@ static void __cache_pool_balance(WT_SESSION_IMPL *session, bool forward) { WT_CACHE_POOL *cp; - bool adjusted; uint64_t bump_threshold, highest; + int i; + bool adjusted; cp = __wt_process.cache_pool; adjusted = false; @@ -438,11 +439,17 @@ __cache_pool_balance(WT_SESSION_IMPL *session, bool forward) /* * Actively attempt to: - * - Reduce the amount allocated, if we are over the budget + * - Reduce the amount allocated, if we are over the budget. * - Increase the amount used if there is capacity and any pressure. + * Don't keep trying indefinitely, if we aren't succeeding in reducing + * the cache in use re-assessing the participants' states is necessary. + * We are also holding a lock across this process, which can slow + * participant shutdown if we spend a long time balancing. */ - while (F_ISSET(cp, WT_CACHE_POOL_ACTIVE) && - F_ISSET(S2C(session)->cache, WT_CACHE_POOL_RUN)) { + for (i = 0; + i < 2 * WT_CACHE_POOL_BUMP_THRESHOLD && + F_ISSET(cp, WT_CACHE_POOL_ACTIVE) && + F_ISSET(S2C(session)->cache, WT_CACHE_POOL_RUN); i++) { __cache_pool_adjust( session, highest, bump_threshold, forward, &adjusted); /* @@ -565,7 +572,7 @@ __cache_pool_adjust(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *entry; uint64_t adjustment, highest_percentile, pressure, reserved, smallest; u_int pct_full; - bool busy, pool_full, grow; + bool busy, decrease_ok, grow, pool_full; *adjustedp = false; cp = __wt_process.cache_pool; @@ -611,6 +618,34 @@ __cache_pool_adjust(WT_SESSION_IMPL *session, if (cache->cp_skip_count > 0 && --cache->cp_skip_count > 0) continue; + /* + * The bump threshold decreases as we try longer to balance + * the pool. Adjust how aggressively we free space from + * participants depending on how long we have been trying. + */ + decrease_ok = false; + /* + * Any participant is a candidate if we have been trying + * for long enough. + */ + if (bump_threshold == 0) + decrease_ok = true; + /* + * Participants that aren't doing application eviction and + * are showing a reasonable amount of usage are excluded + * even if we have been trying for a while. + */ + else if (bump_threshold < WT_CACHE_POOL_BUMP_THRESHOLD / 3 && + (!busy && highest > 1)) + decrease_ok = true; + /* + * Any participant that is proportionally less busy is a + * candidate from the first attempt. + */ + else if (highest > 1 && + pressure < WT_CACHE_POOL_REDUCE_THRESHOLD) + decrease_ok = true; + /* * If the entry is currently allocated less than the reserved * size, increase its allocation. This should only happen if: @@ -624,17 +659,12 @@ __cache_pool_adjust(WT_SESSION_IMPL *session, * Conditions for reducing the amount of resources for an * entry: * - the pool is full, - * - application threads are not busy doing eviction already, * - this entry has more than the minimum amount of space in * use, - * - the read pressure in this entry is below the threshold, - * other entries need more cache, the entry has more than - * the minimum space and there is no available space in the - * pool. + * - it was determined that this slot is a good candidate */ - } else if (pool_full && !busy && - entry->cache_size > reserved && - pressure < WT_CACHE_POOL_REDUCE_THRESHOLD && highest > 1) { + } else if (pool_full && + entry->cache_size > reserved && decrease_ok) { grow = false; /* * Don't drop the size down too much - or it can -- cgit v1.2.1 From c23fa74a5fcefd751532ed0357ee0b237d487ab2 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Mon, 20 Feb 2017 11:02:13 +1100 Subject: WT-3189 Fix a segfault in eviction random page search. (#3308) A NULL page could be encountered when traversing a tree that is being used by exclusive access. --- src/btree/bt_random.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/btree/bt_random.c b/src/btree/bt_random.c index 44de511f787..4c7ff861d26 100644 --- a/src/btree/bt_random.c +++ b/src/btree/bt_random.c @@ -201,6 +201,16 @@ restart: /* current = &btree->root; for (;;) { page = current->page; + /* + * When walking a tree for eviction, an exclusive operation may + * be in progress leaving the root page is not valid. Just give + * up in that case. + */ + if (page == NULL) { + WT_ASSERT(session, eviction); + break; + } + if (!WT_PAGE_IS_INTERNAL(page)) break; -- cgit v1.2.1 From acceacbab536b64d52a1f9ef2e6cbdd54a1996ef Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Mon, 20 Feb 2017 11:04:56 +1100 Subject: WT-3149 Use a range of eviction walk start points. (#3305) Choosing a random point isn't very efficient in append only workloads. --- src/evict/evict_lru.c | 51 ++++++++++++++++++++++++++++++++------------------- src/include/btree.h | 6 +++++- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 42fe4d4608e..07cf8542c53 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1654,31 +1654,36 @@ __evict_walk_file(WT_SESSION_IMPL *session, !F_ISSET(cache, WT_CACHE_EVICT_CLEAN)) min_pages *= 10; + walk_flags = + WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT; + /* * Choose a random point in the tree if looking for candidates in a * tree with no starting point set. This is mostly aimed at ensuring * eviction fairly visits all pages in trees with a lot of in-cache * content. */ - if (btree->evict_ref == NULL) { - /* Ensure internal pages indexes remain valid for our walk */ - WT_WITH_PAGE_INDEX(session, ret = - __wt_random_descent(session, &btree->evict_ref, true)); - WT_RET_NOTFOUND_OK(ret); - - /* - * Reverse the direction of the walk each time we start at a - * random point so both ends of the tree are equally likely to - * be visited. - */ - btree->evict_walk_reverse = !btree->evict_walk_reverse; - } - - walk_flags = - WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT; - - if (btree->evict_walk_reverse) + switch (btree->evict_walk_state) { + case WT_EVICT_WALK_NEXT: + break; + case WT_EVICT_WALK_PREV: FLD_SET(walk_flags, WT_READ_PREV); + break; + case WT_EVICT_WALK_RAND_PREV: + FLD_SET(walk_flags, WT_READ_PREV); + /* FALLTHROUGH */ + case WT_EVICT_WALK_RAND_NEXT: + if (btree->evict_ref == NULL) { + /* Ensure internal pages indexes remain valid */ + WT_WITH_PAGE_INDEX(session, ret = __wt_random_descent( + session, &btree->evict_ref, true)); + WT_RET_NOTFOUND_OK(ret); + } + break; + default: + WT_RET_MSG(session, EINVAL, + "Invalid btree walk state encountered"); + } /* * Get some more eviction candidate pages, starting at the last saved @@ -1713,8 +1718,16 @@ __evict_walk_file(WT_SESSION_IMPL *session, pages_seen > min_pages && (pages_queued == 0 || (pages_seen / pages_queued) > (min_pages / target_pages)); - if (give_up) + if (give_up) { + /* + * Try a different walk start point next time if a + * walk gave up. + */ + btree->evict_walk_state = + (btree->evict_walk_state + 1) % + WT_EVICT_WALK_MAX_LEGAL_VALUE; break; + } if (ref == NULL) { if (++restarts == 2) diff --git a/src/include/btree.h b/src/include/btree.h index d742310bf8f..976c1d2110c 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -141,7 +141,11 @@ struct __wt_btree { u_int evict_walk_skips; /* Number of walks skipped */ u_int evict_disabled; /* Eviction disabled count */ volatile uint32_t evict_busy; /* Count of threads in eviction */ - bool evict_walk_reverse; /* Walk direction */ + enum { + WT_EVICT_WALK_NEXT, WT_EVICT_WALK_PREV, + WT_EVICT_WALK_RAND_NEXT, WT_EVICT_WALK_RAND_PREV + } evict_walk_state; /* Eviction walk state */ +#define WT_EVICT_WALK_MAX_LEGAL_VALUE WT_EVICT_WALK_RAND_PREV + 1 enum { WT_CKPT_OFF, WT_CKPT_PREPARE, WT_CKPT_RUNNING -- cgit v1.2.1 From e7b2a53c33271598c9041eec8363c95ff37daa58 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Mon, 20 Feb 2017 15:17:24 +1100 Subject: WT-3149 Fix a compiler warning on OS X. I guess I shouldn't try to future proof. (#3309) src/evict/evict_lru.c:1683:2: error: default label in switch which covers all enumeration values [-Werror,-Wcovered-switch-default] --- src/evict/evict_lru.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 07cf8542c53..f1949a7c320 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1680,9 +1680,6 @@ __evict_walk_file(WT_SESSION_IMPL *session, WT_RET_NOTFOUND_OK(ret); } break; - default: - WT_RET_MSG(session, EINVAL, - "Invalid btree walk state encountered"); } /* -- cgit v1.2.1 From 1aaf7b2d54886e4d323f05dfa6e08d86d614ee1c Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 21 Feb 2017 07:08:11 -0500 Subject: WT-3191 lint (#3310) * WT-3191 lint Remove WT_UNUSED(session), session is used in the function. * Check returns from WT_CURSOR.get_value(). * Lots of the csuite test programs have "normal" output now, change the testutil_die() function to include a "FAILED" message so it's possible to figure it out. Make the program name a global so we can print it out on error, add a standard testutil_set_progname function to set the program name and call it from everywhere. * Lint is deeply saddened by mixing-and-matching enums and ints, use a switch statement instead of arithmetic operations. * Avoid enum arithmetic with minimal casting. This change only uses the enum for one switch. --- src/evict/evict_lru.c | 8 ++++---- src/include/btree.h | 27 ++++++++++++--------------- src/include/cache.h | 9 +++++++++ src/log/log_slot.c | 1 - test/bloom/test_bloom.c | 13 ++++--------- test/checkpoint/test_checkpoint.c | 17 +++++++---------- test/checkpoint/test_checkpoint.h | 1 - test/csuite/wt3184_dup_index_collator/main.c | 4 ++-- test/cursor_order/cursor_order.c | 6 +----- test/fops/t.c | 12 ++---------- test/format/config.c | 12 ++++++------ test/format/format.h | 2 -- test/format/ops.c | 4 ++-- test/format/t.c | 15 ++++----------- test/format/wts.c | 2 +- test/huge/huge.c | 7 +------ test/manydbs/manydbs.c | 7 ++----- test/readonly/readonly.c | 7 ++----- test/recovery/random-abort.c | 7 ++----- test/recovery/truncated-log.c | 6 +----- test/salvage/salvage.c | 7 +------ test/thread/t.c | 6 +----- test/utility/misc.c | 17 +++++++++++++++++ test/utility/parse_opts.c | 5 +---- test/utility/test_util.h | 5 ++++- 25 files changed, 86 insertions(+), 121 deletions(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index f1949a7c320..f07a823ff57 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1663,7 +1663,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, * eviction fairly visits all pages in trees with a lot of in-cache * content. */ - switch (btree->evict_walk_state) { + switch ((WT_EVICT_WALK_START)btree->evict_start_type) { case WT_EVICT_WALK_NEXT: break; case WT_EVICT_WALK_PREV: @@ -1720,9 +1720,9 @@ __evict_walk_file(WT_SESSION_IMPL *session, * Try a different walk start point next time if a * walk gave up. */ - btree->evict_walk_state = - (btree->evict_walk_state + 1) % - WT_EVICT_WALK_MAX_LEGAL_VALUE; + btree->evict_start_type = + (btree->evict_start_type + 1) % + WT_EVICT_WALK_START_NUM; break; } diff --git a/src/include/btree.h b/src/include/btree.h index 976c1d2110c..2aa0e470f59 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -130,22 +130,19 @@ struct __wt_btree { uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ uint64_t write_gen; /* Write generation */ - uint64_t bytes_inmem; /* Cache bytes in memory. */ - uint64_t bytes_dirty_intl; /* Bytes in dirty internal pages. */ - uint64_t bytes_dirty_leaf; /* Bytes in dirty leaf pages. */ - - WT_REF *evict_ref; /* Eviction thread's location */ - uint64_t evict_priority; /* Relative priority of cached pages */ - u_int evict_walk_period; /* Skip this many LRU walks */ - u_int evict_walk_saved; /* Saved walk skips for checkpoints */ - u_int evict_walk_skips; /* Number of walks skipped */ - u_int evict_disabled; /* Eviction disabled count */ + uint64_t bytes_inmem; /* Cache bytes in memory. */ + uint64_t bytes_dirty_intl; /* Bytes in dirty internal pages. */ + uint64_t bytes_dirty_leaf; /* Bytes in dirty leaf pages. */ + + WT_REF *evict_ref; /* Eviction thread's location */ + uint64_t evict_priority; /* Relative priority of cached pages */ + u_int evict_walk_period; /* Skip this many LRU walks */ + u_int evict_walk_saved; /* Saved walk skips for checkpoints */ + u_int evict_walk_skips; /* Number of walks skipped */ + u_int evict_disabled; /* Eviction disabled count */ volatile uint32_t evict_busy; /* Count of threads in eviction */ - enum { - WT_EVICT_WALK_NEXT, WT_EVICT_WALK_PREV, - WT_EVICT_WALK_RAND_NEXT, WT_EVICT_WALK_RAND_PREV - } evict_walk_state; /* Eviction walk state */ -#define WT_EVICT_WALK_MAX_LEGAL_VALUE WT_EVICT_WALK_RAND_PREV + 1 + int evict_start_type; /* Start position for eviction walk + (see WT_EVICT_WALK_START). */ enum { WT_CKPT_OFF, WT_CKPT_PREPARE, WT_CKPT_RUNNING diff --git a/src/include/cache.h b/src/include/cache.h index abd5a1901f7..04920c3585a 100644 --- a/src/include/cache.h +++ b/src/include/cache.h @@ -18,6 +18,15 @@ #define WT_EVICT_MAX_TREES 1000 /* Maximum walk points */ +/* Ways to position when starting an eviction walk. */ +typedef enum { + WT_EVICT_WALK_NEXT, + WT_EVICT_WALK_PREV, + WT_EVICT_WALK_RAND_NEXT, + WT_EVICT_WALK_RAND_PREV +} WT_EVICT_WALK_START; +#define WT_EVICT_WALK_START_NUM (WT_EVICT_WALK_RAND_PREV + 1) + /* * WT_EVICT_ENTRY -- * Encapsulation of an eviction candidate. diff --git a/src/log/log_slot.c b/src/log/log_slot.c index b4655ff6c1a..c685aec3ffc 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -574,7 +574,6 @@ __wt_log_slot_release(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, int64_t size) wt_off_t cur_offset, my_start; int64_t my_size, rel_size; - WT_UNUSED(session); slot = myslot->slot; my_start = slot->slot_start_offset + myslot->offset; /* diff --git a/test/bloom/test_bloom.c b/test/bloom/test_bloom.c index 67249ff887e..bef509e01d8 100644 --- a/test/bloom/test_bloom.c +++ b/test/bloom/test_bloom.c @@ -29,8 +29,6 @@ #include "test_util.h" static struct { - char *progname; /* Program name */ - WT_CONNECTION *wt_conn; /* WT_CONNECTION handle */ WT_SESSION *wt_session; /* WT_SESSION handle */ @@ -61,10 +59,7 @@ main(int argc, char *argv[]) { int ch; - if ((g.progname = strrchr(argv[0], DIR_DELIM)) == NULL) - g.progname = argv[0]; - else - ++g.progname; + (void)testutil_set_progname(argv); /* Set default configuration values. */ g.c_cache = 10; @@ -75,7 +70,7 @@ main(int argc, char *argv[]) g.c_srand = 3233456; /* Set values from the command line. */ - while ((ch = __wt_getopt(g.progname, argc, argv, "c:f:k:o:s:")) != EOF) + while ((ch = __wt_getopt(progname, argc, argv, "c:f:k:o:s:")) != EOF) switch (ch) { case 'c': /* Cache size */ g.c_cache = (u_int)atoi(__wt_optarg); @@ -128,7 +123,7 @@ setup(void) */ snprintf(config, sizeof(config), "create,error_prefix=\"%s\",cache_size=%" PRIu32 "MB,%s", - g.progname, g.c_cache, g.config_open == NULL ? "" : g.config_open); + progname, g.c_cache, g.config_open == NULL ? "" : g.config_open); testutil_check(wiredtiger_open(NULL, NULL, config, &conn)); @@ -246,7 +241,7 @@ populate_entries(void) void usage(void) { - fprintf(stderr, "usage: %s [-cfkos]\n", g.progname); + fprintf(stderr, "usage: %s [-cfkos]\n", progname); fprintf(stderr, "%s", "\t-c cache size\n" "\t-f number of bits per item\n" diff --git a/test/checkpoint/test_checkpoint.c b/test/checkpoint/test_checkpoint.c index 4998019ad8e..c7132b433d2 100644 --- a/test/checkpoint/test_checkpoint.c +++ b/test/checkpoint/test_checkpoint.c @@ -50,10 +50,7 @@ main(int argc, char *argv[]) char *working_dir; const char *config_open; - if ((g.progname = strrchr(argv[0], DIR_DELIM)) == NULL) - g.progname = argv[0]; - else - ++g.progname; + (void)testutil_set_progname(argv); config_open = NULL; ret = 0; @@ -68,7 +65,7 @@ main(int argc, char *argv[]) runs = 1; while ((ch = __wt_getopt( - g.progname, argc, argv, "c:C:h:k:l:n:r:t:T:W:")) != EOF) + progname, argc, argv, "c:C:h:k:l:n:r:t:T:W:")) != EOF) switch (ch) { case 'c': g.checkpoint_name = __wt_optarg; @@ -132,7 +129,7 @@ main(int argc, char *argv[]) testutil_work_dir_from_path(g.home, 512, working_dir); - printf("%s: process %" PRIu64 "\n", g.progname, (uint64_t)getpid()); + printf("%s: process %" PRIu64 "\n", progname, (uint64_t)getpid()); for (cnt = 1; (runs == 0 || cnt <= runs) && g.status == 0; ++cnt) { printf(" %d: %d workers, %d tables\n", cnt, g.nworkers, g.ntables); @@ -204,7 +201,7 @@ wt_connect(const char *config_open) snprintf(config, sizeof(config), "create,statistics=(fast),error_prefix=\"%s\",cache_size=1GB%s%s", - g.progname, + progname, config_open == NULL ? "" : ",", config_open == NULL ? "" : config_open); @@ -297,10 +294,10 @@ log_print_err(const char *m, int e, int fatal) g.running = 0; g.status = e; } - fprintf(stderr, "%s: %s: %s\n", g.progname, m, wiredtiger_strerror(e)); + fprintf(stderr, "%s: %s: %s\n", progname, m, wiredtiger_strerror(e)); if (g.logfp != NULL) fprintf(g.logfp, "%s: %s: %s\n", - g.progname, m, wiredtiger_strerror(e)); + progname, m, wiredtiger_strerror(e)); return (e); } @@ -333,7 +330,7 @@ usage(void) "usage: %s " "[-S] [-C wiredtiger-config] [-k keys] [-l log]\n\t" "[-n ops] [-c checkpoint] [-r runs] [-t f|r|v] [-W workers]\n", - g.progname); + progname); fprintf(stderr, "%s", "\t-C specify wiredtiger_open configuration arguments\n" "\t-c checkpoint name to used named checkpoints\n" diff --git a/test/checkpoint/test_checkpoint.h b/test/checkpoint/test_checkpoint.h index 0d0d02447d5..347bd2c6e89 100644 --- a/test/checkpoint/test_checkpoint.h +++ b/test/checkpoint/test_checkpoint.h @@ -58,7 +58,6 @@ typedef struct { u_int nkeys; /* Keys to load */ u_int nops; /* Operations per thread */ FILE *logfp; /* Message log file. */ - char *progname; /* Program name */ int nworkers; /* Number workers configured */ int ntables; /* Number tables configured */ int ntables_created; /* Number tables opened */ diff --git a/test/csuite/wt3184_dup_index_collator/main.c b/test/csuite/wt3184_dup_index_collator/main.c index bcefd2f1a3b..c969e7a1d7e 100644 --- a/test/csuite/wt3184_dup_index_collator/main.c +++ b/test/csuite/wt3184_dup_index_collator/main.c @@ -157,9 +157,9 @@ main(int argc, char *argv[]) printf("duplicating cursor\n"); testutil_check(session->open_cursor(session, NULL, cursor, NULL, &cursor1)); - cursor->get_value(cursor, &got); + testutil_check(cursor->get_value(cursor, &got)); testutil_assert(item_to_int(&got) == 17); - cursor1->get_value(cursor1, &got); + testutil_check(cursor1->get_value(cursor1, &got)); testutil_assert(item_to_int(&got) == 17); testutil_check(session->close(session, NULL)); diff --git a/test/cursor_order/cursor_order.c b/test/cursor_order/cursor_order.c index 85b8c68e545..62777f552bf 100644 --- a/test/cursor_order/cursor_order.c +++ b/test/cursor_order/cursor_order.c @@ -29,7 +29,6 @@ #include "cursor_order.h" static char home[512]; /* Program working dir */ -static char *progname; /* Program name */ static FILE *logfp; /* Log file */ static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *); @@ -51,10 +50,7 @@ main(int argc, char *argv[]) int ch, cnt, runs; char *config_open, *working_dir; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); cfg = &_cfg; config_open = NULL; diff --git a/test/fops/t.c b/test/fops/t.c index 7b4a7cf8fca..651d22c8deb 100644 --- a/test/fops/t.c +++ b/test/fops/t.c @@ -34,7 +34,6 @@ u_int nops; /* Operations */ const char *uri; /* Object */ const char *config; /* Object config */ -static char *progname; /* Program name */ static FILE *logfp; /* Log file */ static char home[512]; @@ -71,22 +70,15 @@ main(int argc, char *argv[]) int ch, cnt, ret, runs; char *config_open, *working_dir; - working_dir = NULL; - - /* Remove directories */ - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); if ((ret = pthread_rwlock_init(&single, NULL)) != 0) testutil_die(ret, "pthread_rwlock_init: single"); - config_open = NULL; nops = 1000; nthreads = 10; runs = 1; - + config_open = working_dir = NULL; while ((ch = __wt_getopt(progname, argc, argv, "C:h:l:n:r:t:")) != EOF) switch (ch) { case 'C': /* wiredtiger_open config */ diff --git a/test/format/config.c b/test/format/config.c index 50430fe073e..958ad6b7a99 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -104,7 +104,7 @@ config_setup(void) if (DATASOURCE("lsm") && g.type != ROW) { fprintf(stderr, "%s: lsm data_source is only compatible with row file_type\n", - g.progname); + progname); exit(EXIT_FAILURE); } @@ -681,7 +681,7 @@ config_single(const char *s, int perm) if ((ep = strchr(s, '=')) == NULL) { fprintf(stderr, - "%s: %s: illegal configuration value\n", g.progname, s); + "%s: %s: illegal configuration value\n", progname, s); exit(EXIT_FAILURE); } @@ -751,20 +751,20 @@ config_single(const char *s, int perm) v = strtol(ep, &p, 10); if (*p != '\0') { fprintf(stderr, "%s: %s: illegal numeric value\n", - g.progname, s); + progname, s); exit(EXIT_FAILURE); } } if (F_ISSET(cp, C_BOOL)) { if (v != 0 && v != 1) { fprintf(stderr, "%s: %s: value of boolean not 0 or 1\n", - g.progname, s); + progname, s); exit(EXIT_FAILURE); } } else if (v < cp->min || v > cp->maxset) { fprintf(stderr, "%s: %s: value outside min/max values of %" PRIu32 "-%" PRIu32 "\n", - g.progname, s, cp->min, cp->maxset); + progname, s, cp->min, cp->maxset); exit(EXIT_FAILURE); } *cp->v = (uint32_t)v; @@ -883,7 +883,7 @@ config_find(const char *s, size_t len) return (cp); fprintf(stderr, - "%s: %s: unknown configuration keyword\n", g.progname, s); + "%s: %s: unknown configuration keyword\n", progname, s); config_error(); exit(EXIT_FAILURE); } diff --git a/test/format/format.h b/test/format/format.h index 6bb44410acc..41cc48c4278 100644 --- a/test/format/format.h +++ b/test/format/format.h @@ -79,8 +79,6 @@ #define FORMAT_OPERATION_REPS 3 /* 3 thread operations sets */ typedef struct { - char *progname; /* Program name */ - char *home; /* Home directory */ char *home_backup; /* Hot-backup directory */ char *home_backup_init; /* Initialize backup command */ diff --git a/test/format/ops.c b/test/format/ops.c index 940318c87a9..1013d1da30b 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -1448,7 +1448,7 @@ notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno) return (1); if (bdb_notfound) { - fprintf(stderr, "%s: %s:", g.progname, f); + fprintf(stderr, "%s: %s:", progname, f); if (keyno != 0) fprintf(stderr, " row %" PRIu64 ":", keyno); fprintf(stderr, @@ -1456,7 +1456,7 @@ notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno) testutil_die(0, NULL); } if (wt_ret == WT_NOTFOUND) { - fprintf(stderr, "%s: %s:", g.progname, f); + fprintf(stderr, "%s: %s:", progname, f); if (keyno != 0) fprintf(stderr, " row %" PRIu64 ":", keyno); fprintf(stderr, diff --git a/test/format/t.c b/test/format/t.c index 7701595776c..c6686ae8b91 100644 --- a/test/format/t.c +++ b/test/format/t.c @@ -49,14 +49,7 @@ main(int argc, char *argv[]) config = NULL; -#ifdef _WIN32 - g.progname = "t_format.exe"; -#else - if ((g.progname = strrchr(argv[0], DIR_DELIM)) == NULL) - g.progname = argv[0]; - else - ++g.progname; -#endif + (void)testutil_set_progname(argv); #if 0 /* Configure the GNU malloc for debugging. */ @@ -74,7 +67,7 @@ main(int argc, char *argv[]) home = NULL; onerun = 0; while ((ch = __wt_getopt( - g.progname, argc, argv, "1C:c:H:h:Llqrt:")) != EOF) + progname, argc, argv, "1C:c:H:h:Llqrt:")) != EOF) switch (ch) { case '1': /* One run */ onerun = 1; @@ -179,7 +172,7 @@ main(int argc, char *argv[]) testutil_check(pthread_rwlock_init(&g.checkpoint_lock, NULL)); testutil_check(pthread_rwlock_init(&g.death_lock, NULL)); - printf("%s: process %" PRIdMAX "\n", g.progname, (intmax_t)getpid()); + printf("%s: process %" PRIdMAX "\n", progname, (intmax_t)getpid()); while (++g.run_cnt <= g.c_runs || g.c_runs == 0 ) { startup(); /* Start a run */ @@ -344,7 +337,7 @@ usage(void) "usage: %s [-1Llqr] [-C wiredtiger-config]\n " "[-c config-file] [-H mount] [-h home] " "[name=value ...]\n", - g.progname); + progname); fprintf(stderr, "%s", "\t-1 run once\n" "\t-C specify wiredtiger_open configuration arguments\n" diff --git a/test/format/wts.c b/test/format/wts.c index da234ce53c7..a87aa5b9f88 100644 --- a/test/format/wts.c +++ b/test/format/wts.c @@ -144,7 +144,7 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) "cache_size=%" PRIu32 "MB," "checkpoint_sync=false," "error_prefix=\"%s\"", - g.c_cache, g.progname); + g.c_cache, progname); /* In-memory configuration. */ if (g.c_in_memory != 0) diff --git a/test/huge/huge.c b/test/huge/huge.c index 17e2db353d5..2b0d5f498e3 100644 --- a/test/huge/huge.c +++ b/test/huge/huge.c @@ -29,7 +29,6 @@ #include "test_util.h" static char home[512]; /* Program working dir */ -static const char *progname; /* Program name */ static uint8_t *big; /* Big key/value buffer */ #define GIGABYTE (1073741824) @@ -167,14 +166,10 @@ main(int argc, char *argv[]) int ch, small; char *working_dir; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); small = 0; working_dir = NULL; - while ((ch = __wt_getopt(progname, argc, argv, "h:s")) != EOF) switch (ch) { case 'h': diff --git a/test/manydbs/manydbs.c b/test/manydbs/manydbs.c index 7e986d47af3..345c470ba90 100644 --- a/test/manydbs/manydbs.c +++ b/test/manydbs/manydbs.c @@ -32,7 +32,6 @@ #define HOME_BASE "WT_TEST" static char home[HOME_SIZE]; /* Base home directory */ static char hometmp[HOME_SIZE]; /* Each conn home directory */ -static const char *progname; /* Program name */ static const char * const uri = "table:main"; #define WTOPEN_CFG_COMMON \ @@ -129,10 +128,8 @@ main(int argc, char *argv[]) const char *working_dir, *wt_cfg; char cmd[128]; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); + dbs = MAX_DBS; working_dir = HOME_BASE; idle = false; diff --git a/test/readonly/readonly.c b/test/readonly/readonly.c index a4b79f5859f..746aecbf6c5 100644 --- a/test/readonly/readonly.c +++ b/test/readonly/readonly.c @@ -39,7 +39,6 @@ static char home_rd[HOME_SIZE + sizeof(HOME_RD_SUFFIX)]; #define HOME_RD2_SUFFIX ".RDNOLOCK" /* Read-only dir no lock file */ static char home_rd2[HOME_SIZE + sizeof(HOME_RD2_SUFFIX)]; -static const char *progname; /* Program name */ static const char *saved_argv0; /* Program command */ static const char * const uri = "table:main"; @@ -172,10 +171,8 @@ main(int argc, char *argv[]) char cmd[512]; uint8_t buf[MAX_VAL]; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); + /* * Needed unaltered for system command later. */ diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c index 660ef0cca67..1d6599ce1b3 100644 --- a/test/recovery/random-abort.c +++ b/test/recovery/random-abort.c @@ -32,7 +32,7 @@ #include static char home[1024]; /* Program working dir */ -static const char *progname; /* Program name */ + /* * These two names for the URI and file system must be maintained in tandem. */ @@ -229,10 +229,7 @@ main(int argc, char *argv[]) const char *working_dir; char fname[64], kname[64], statname[1024]; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); inmem = false; nth = MIN_TH; diff --git a/test/recovery/truncated-log.c b/test/recovery/truncated-log.c index 6a142b8e710..1f0a0f7a7bd 100644 --- a/test/recovery/truncated-log.c +++ b/test/recovery/truncated-log.c @@ -36,7 +36,6 @@ #endif static char home[1024]; /* Program working dir */ -static const char *progname; /* Program name */ static const char * const uri = "table:main"; #define RECORDS_FILE "records" @@ -271,10 +270,7 @@ main(int argc, char *argv[]) pid_t pid; const char *working_dir; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); working_dir = "WT_TEST.truncated-log"; while ((ch = __wt_getopt(progname, argc, argv, "h:")) != EOF) diff --git a/test/salvage/salvage.c b/test/salvage/salvage.c index b8553bbd72d..942f7faba03 100644 --- a/test/salvage/salvage.c +++ b/test/salvage/salvage.c @@ -54,8 +54,6 @@ void run(int); void t(int, u_int, int); int usage(void); -static const char *progname; /* Program name */ - static FILE *res_fp; /* Results file */ static u_int page_type; /* File types */ static int value_unique; /* Values are unique */ @@ -70,10 +68,7 @@ main(int argc, char *argv[]) u_int ptype; int ch, r; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); r = 0; ptype = 0; diff --git a/test/thread/t.c b/test/thread/t.c index baadbf2adb9..9dfd02bdad2 100644 --- a/test/thread/t.c +++ b/test/thread/t.c @@ -37,7 +37,6 @@ int multiple_files; /* File per thread */ int session_per_op; /* New session per operation */ static char home[512]; /* Program working dir */ -static char *progname; /* Program name */ static FILE *logfp; /* Log file */ static int handle_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *); @@ -59,10 +58,7 @@ main(int argc, char *argv[]) int ch, cnt, runs; char *config_open, *working_dir; - if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) - progname = argv[0]; - else - ++progname; + (void)testutil_set_progname(argv); config_open = NULL; working_dir = NULL; diff --git a/test/utility/misc.c b/test/utility/misc.c index 1ba08ddd77f..8aee9d16f66 100644 --- a/test/utility/misc.c +++ b/test/utility/misc.c @@ -28,6 +28,7 @@ #include "test_util.h" void (*custom_die)(void) = NULL; +const char *progname = "program name not set"; /* * die -- @@ -42,7 +43,9 @@ testutil_die(int e, const char *fmt, ...) if (custom_die != NULL) (*custom_die)(); + fprintf(stderr, "%s: FAILED", progname); if (fmt != NULL) { + fprintf(stderr, ": "); va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); @@ -54,6 +57,20 @@ testutil_die(int e, const char *fmt, ...) exit(EXIT_FAILURE); } +/* + * testutil_set_progname -- + * Set the global program name for error handling. + */ +const char * +testutil_set_progname(char * const *argv) +{ + if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) + progname = argv[0]; + else + ++progname; + return (progname); +} + /* * testutil_work_dir_from_path -- * Takes a buffer, its size and the intended work directory. diff --git a/test/utility/parse_opts.c b/test/utility/parse_opts.c index 74a1c021d5d..af9256b199a 100644 --- a/test/utility/parse_opts.c +++ b/test/utility/parse_opts.c @@ -43,10 +43,7 @@ testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts) opts->running = true; opts->verbose = false; - if ((opts->progname = strrchr(argv[0], DIR_DELIM)) == NULL) - opts->progname = argv[0]; - else - ++opts->progname; + opts->progname = testutil_set_progname(argv); while ((ch = __wt_getopt(opts->progname, argc, argv, "A:h:n:o:pR:T:t:vW:")) != EOF) diff --git a/test/utility/test_util.h b/test/utility/test_util.h index 489bbe18d87..34829d06f6b 100644 --- a/test/utility/test_util.h +++ b/test/utility/test_util.h @@ -48,7 +48,7 @@ /* Generic option parsing structure shared by all test cases. */ typedef struct { char *home; - char *progname; + const char *progname; enum { TABLE_COL=1, /* Fixed-length column store */ TABLE_FIX=2, /* Variable-length column store */ TABLE_ROW=3 /* Row-store */ @@ -192,3 +192,6 @@ void testutil_work_dir_from_path(char *, size_t, const char *); void *thread_append(void *); void *thread_insert_append(void *); void *thread_prev(void *); + +extern const char *progname; +const char *testutil_set_progname(char * const *); -- cgit v1.2.1 From 774c4c208850622f1d908ff0b08bd812b459f59e Mon Sep 17 00:00:00 2001 From: Sasha Fedorova Date: Wed, 22 Feb 2017 22:14:45 +0000 Subject: Reduced the time that the eviction server has to wait for every data point measuring the eviction rate when it is making the tuning decisions. I observed performance improvements across the board from this change. --- src/evict/evict_lru.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index f07a823ff57..d16594b0816 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -921,7 +921,7 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) #define EVICT_TUNE_DATAPT_MIN 3 /* Data points needed before deciding if we should keep adding workers or settle on an earlier value. */ -#define EVICT_TUNE_PERIOD 2 /* Tune period in seconds */ +#define EVICT_TUNE_PERIOD 1 /* Tune period in seconds */ /* * __evict_tune_workers -- -- cgit v1.2.1 From e4146723a89a5f318515f8ee2b662a4bb7d0b919 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 27 Feb 2017 19:16:41 -0500 Subject: WT-3182 Switch make-check to run the short test suite by default (#3313) --- test/checkpoint/smoke.sh | 4 ++-- test/csuite/wt2246_col_append/main.c | 5 +++-- test/csuite/wt2323_join_visibility/main.c | 5 +++-- test/csuite/wt2535_insert_race/main.c | 5 +++-- test/csuite/wt2834_join_bloom_fix/main.c | 6 +++--- test/csuite/wt2853_perf/main.c | 6 +++--- test/csuite/wt2909_checkpoint_integrity/main.c | 26 ++++++++++++-------------- test/utility/misc.c | 15 ++++++++++----- test/utility/test_util.h | 2 +- 9 files changed, 40 insertions(+), 34 deletions(-) diff --git a/test/checkpoint/smoke.sh b/test/checkpoint/smoke.sh index 123d4e00df5..39b1f428c2c 100755 --- a/test/checkpoint/smoke.sh +++ b/test/checkpoint/smoke.sh @@ -6,8 +6,8 @@ set -e echo "checkpoint: 3 mixed tables" $TEST_WRAPPER ./t -T 3 -t m -# We are done if short tests are requested -test -z "$TESTUTIL_DISABLE_LONG_TESTS" || exit 0 +# We are done unless long tests are enabled. +test "$TESTUTIL_ENABLE_LONG_TESTS" = "1" || exit 0 echo "checkpoint: 6 column-store tables" $TEST_WRAPPER ./t -T 6 -t c diff --git a/test/csuite/wt2246_col_append/main.c b/test/csuite/wt2246_col_append/main.c index 4b352b26051..976e2269da6 100644 --- a/test/csuite/wt2246_col_append/main.c +++ b/test/csuite/wt2246_col_append/main.c @@ -101,9 +101,10 @@ main(int argc, char *argv[]) uint64_t i, id; char buf[100]; + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; - if (testutil_disable_long_tests()) - return (0); memset(opts, 0, sizeof(*opts)); opts->table_type = TABLE_ROW; opts->n_append_threads = N_APPEND_THREADS; diff --git a/test/csuite/wt2323_join_visibility/main.c b/test/csuite/wt2323_join_visibility/main.c index 239a3f300d0..a61f707e008 100644 --- a/test/csuite/wt2323_join_visibility/main.c +++ b/test/csuite/wt2323_join_visibility/main.c @@ -92,10 +92,11 @@ main(int argc, char *argv[]) TEST_OPTS *opts, _opts; const char *tablename; + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; sharedopts = &_sharedopts; - if (testutil_disable_long_tests()) - return (0); memset(opts, 0, sizeof(*opts)); memset(sharedopts, 0, sizeof(*sharedopts)); diff --git a/test/csuite/wt2535_insert_race/main.c b/test/csuite/wt2535_insert_race/main.c index ae18760a829..ba17d485e07 100644 --- a/test/csuite/wt2535_insert_race/main.c +++ b/test/csuite/wt2535_insert_race/main.c @@ -49,9 +49,10 @@ main(int argc, char *argv[]) uint64_t current_value; int i; + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; - if (testutil_disable_long_tests()) - return (0); memset(opts, 0, sizeof(*opts)); opts->nthreads = 10; opts->nrecords = 1000; diff --git a/test/csuite/wt2834_join_bloom_fix/main.c b/test/csuite/wt2834_join_bloom_fix/main.c index 7c80496f1b6..f2c54b942be 100644 --- a/test/csuite/wt2834_join_bloom_fix/main.c +++ b/test/csuite/wt2834_join_bloom_fix/main.c @@ -59,11 +59,11 @@ main(int argc, char *argv[]) char flaguri[256]; char joinuri[256]; + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; - if (testutil_disable_long_tests()) - return (0); memset(opts, 0, sizeof(*opts)); - testutil_check(testutil_parse_opts(argc, argv, opts)); testutil_make_work_dir(opts->home); diff --git a/test/csuite/wt2853_perf/main.c b/test/csuite/wt2853_perf/main.c index 6cec9634cd1..b365b03493a 100644 --- a/test/csuite/wt2853_perf/main.c +++ b/test/csuite/wt2853_perf/main.c @@ -82,11 +82,11 @@ main(int argc, char *argv[]) int i, nfail; const char *tablename; + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; sharedopts = &_sharedopts; - - if (testutil_disable_long_tests()) - return (0); memset(opts, 0, sizeof(*opts)); memset(sharedopts, 0, sizeof(*sharedopts)); memset(insert_args, 0, sizeof(insert_args)); diff --git a/test/csuite/wt2909_checkpoint_integrity/main.c b/test/csuite/wt2909_checkpoint_integrity/main.c index ddf249fb406..0ae81543050 100644 --- a/test/csuite/wt2909_checkpoint_integrity/main.c +++ b/test/csuite/wt2909_checkpoint_integrity/main.c @@ -96,9 +96,8 @@ static void run_check_subtest(TEST_OPTS *, const char *, uint64_t, bool, uint64_t *); static void run_check_subtest_range(TEST_OPTS *, const char *, bool); static int run_process(TEST_OPTS *, const char *, char *[], int *); -static int subtest_main(int, char *[], bool); +static void subtest_main(int, char *[], bool); static void subtest_populate(TEST_OPTS *, bool); -int main(int, char *[]); extern int __wt_optind; @@ -446,7 +445,7 @@ run_process(TEST_OPTS *opts, const char *prog, char *argv[], int *status) * subtest_main -- * The main program for the subtest */ -static int +static void subtest_main(int argc, char *argv[], bool close_test) { TEST_OPTS *opts, _opts; @@ -454,8 +453,6 @@ subtest_main(int argc, char *argv[], bool close_test) char config[1024], filename[1024]; struct rlimit rlim; - if (testutil_disable_long_tests()) - return (0); opts = &_opts; memset(opts, 0, sizeof(*opts)); memset(&rlim, 0, sizeof(rlim)); @@ -499,8 +496,6 @@ subtest_main(int argc, char *argv[], bool close_test) subtest_populate(opts, close_test); testutil_cleanup(opts); - - return (0); } /* @@ -622,8 +617,9 @@ main(int argc, char *argv[]) uint64_t nresults; const char *debugger; - if (testutil_disable_long_tests()) - return (0); + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + opts = &_opts; memset(opts, 0, sizeof(*opts)); debugger = NULL; @@ -635,11 +631,13 @@ main(int argc, char *argv[]) opts->nrecords = 50000; while (argc > 0) { - if (strcmp(argv[0], "subtest") == 0) - return (subtest_main(argc, argv, false)); - else if (strcmp(argv[0], "subtest_close") == 0) - return (subtest_main(argc, argv, true)); - else if (strcmp(argv[0], "gdb") == 0) + if (strcmp(argv[0], "subtest") == 0) { + subtest_main(argc, argv, false); + return (0); + } else if (strcmp(argv[0], "subtest_close") == 0) { + subtest_main(argc, argv, true); + return (0); + } else if (strcmp(argv[0], "gdb") == 0) debugger = "/usr/bin/gdb"; else testutil_assert(false); diff --git a/test/utility/misc.c b/test/utility/misc.c index 8aee9d16f66..61dad3d76c2 100644 --- a/test/utility/misc.c +++ b/test/utility/misc.c @@ -166,20 +166,25 @@ testutil_cleanup(TEST_OPTS *opts) } /* - * testutil_disable_long_tests -- - * Return if TESTUTIL_DISABLE_LONG_TESTS is set. + * testutil_enable_long_tests -- + * Return if TESTUTIL_ENABLE_LONG_TESTS is set. */ bool -testutil_disable_long_tests(void) +testutil_enable_long_tests(void) { const char *res; + bool enable_long_tests; if (__wt_getenv(NULL, - "TESTUTIL_DISABLE_LONG_TESTS", &res) == WT_NOTFOUND) + "TESTUTIL_ENABLE_LONG_TESTS", &res) == WT_NOTFOUND) return (false); + /* Accept anything other than "TESTUTIL_ENABLE_LONG_TESTS=0". */ + enable_long_tests = res[0] != '0'; + free((void *)res); - return (true); + + return (enable_long_tests); } /* diff --git a/test/utility/test_util.h b/test/utility/test_util.h index 34829d06f6b..406ed2c4961 100644 --- a/test/utility/test_util.h +++ b/test/utility/test_util.h @@ -185,7 +185,7 @@ void *dstrdup(const void *); void *dstrndup(const char *, size_t); void testutil_clean_work_dir(const char *); void testutil_cleanup(TEST_OPTS *); -bool testutil_disable_long_tests(void); +bool testutil_enable_long_tests(void); void testutil_make_work_dir(char *); int testutil_parse_opts(int, char * const *, TEST_OPTS *); void testutil_work_dir_from_path(char *, size_t, const char *); -- cgit v1.2.1 From 09e26f73985f3ba023602de7dad9ad036700cf25 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Tue, 28 Feb 2017 15:27:28 +1100 Subject: WT-3182 Update Evergreen configuration to include long make check (#3314) --- test/mciproject.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/mciproject.yml b/test/mciproject.yml index eb74914eb46..6456475aa00 100644 --- a/test/mciproject.yml +++ b/test/mciproject.yml @@ -65,7 +65,7 @@ tasks: ./build_posix/reconf ${configure_env_vars|} ./configure --enable-diagnostic --enable-python --enable-zlib --enable-strict --enable-verbose ${make_command|make} ${smp_command|} 2>&1 - ${make_command|make} VERBOSE=1 check 2>&1 + TESTUTIL_ENABLE_LONG_TESTS=1 ${make_command|make} VERBOSE=1 check 2>&1 fi - command: archive.targz_pack params: -- cgit v1.2.1 From f3747a2625a531e0405fd8f5f256184ca5479197 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 28 Feb 2017 09:09:04 -0500 Subject: WT-3193 Close a race between verify and the eviction server (#3311) * Don't jump out of the loop if we find an invalid root page, return WT_NOTFOUND instead. Jumping out of the loop leads to potentially setting a returned WT_REF and returning 0, which aren't right for __wt_btcur_next_random(). For clarity, this change is not part of the bug fix: we should never be in this code unless we're in eviction, but I don't want to return success, either. We weren't always overwriting the returned WT_REF; do so with a NULL so no caller depends on it not being overwritten in some path. Fix a comment. * WT-3189 Fix a segfault in the eviction server random positioning Back out a26e81d: there's an underlying bug here that verify swaps different checkpoint root pages in-and-out of the WT_BTREE.root structure, without locking out eviction, so checking for a NULL WT_BTREE.root.page pointer isn't sufficient. * Add a comment to explain why we're checking for a NULL root page: there are paths that get here without ever reading in a page from the backing file (for example, when discarding lock-only handles). * Verify has to lock out eviction when swapping checkpoint root pages to/from the WT_BTREE.root structure. Fix a bug in error handling where we could leave a checkpoint loaded in the underlying block manager. * If we have to unload the checkpoint, we also may have to empty the cache. * Rather than turning off eviction in the "special" commands (rebalance, salvage, upgrade and verify), after setting the handle open flag and potentially letting eviction access the tree, turn off eviction before returning into the handle code and setting the handle-open flag. Change the WT_BTREE.close code to clear everything in the WT_BTREE structure the btree layer owns. On close, leave just cache and eviction information, and the LSM-primary flag. On open, leave just the operation flags set by the handle-manager. * Remove eviction lockout code from __wt_evict_file(), assert callers have already done so. Change __wt_session_lock_checkpoint() to lock out eviction before calling __wt_evict_file(). * Revert unnecessary/accidental change. * The only place we should see empty trees is when evicting handles that never loaded any backing pages, for whatever reason, check for that in __wt_evict_file(). Change the eviction code and the tree walk code to assert they never see handles without a valid root page. * Replace explicit WT_PTRDIFF() calls on structure members with offsetof(). * Rework a comment to clarify the reasons for special WT_BTREE initialization. --- src/btree/bt_cursor.c | 4 +- src/btree/bt_handle.c | 102 +++++++++++++++++++++++++----------------- src/btree/bt_random.c | 12 +---- src/btree/bt_rebalance.c | 15 +------ src/btree/bt_slvg.c | 12 ----- src/btree/bt_vrfy.c | 36 ++++++++++++--- src/btree/bt_walk.c | 11 +++-- src/evict/evict_file.c | 20 ++++++--- src/evict/evict_lru.c | 16 +++++-- src/include/btree.h | 37 ++++++++------- src/include/extern.h | 1 - src/include/session.h | 24 +++++----- src/session/session_api.c | 2 +- src/session/session_dhandle.c | 11 ++--- 14 files changed, 171 insertions(+), 132 deletions(-) diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 5fde2237538..6a48c5f752b 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -512,7 +512,7 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) */ if (btree->bulk_load_ok) { btree->bulk_load_ok = false; - __wt_btree_evictable(session, true); + __wt_evict_file_exclusive_off(session); } retry: WT_RET(__cursor_func_init(cbt, true)); @@ -766,7 +766,7 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt) */ if (btree->bulk_load_ok) { btree->bulk_load_ok = false; - __wt_btree_evictable(session, true); + __wt_evict_file_exclusive_off(session); } retry: WT_RET(__cursor_func_init(cbt, true)); diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 6ed70788759..d714dab6000 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -14,6 +14,40 @@ static int __btree_page_sizes(WT_SESSION_IMPL *); static int __btree_preload(WT_SESSION_IMPL *); static int __btree_tree_open_empty(WT_SESSION_IMPL *, bool); +/* + * __btree_initialize -- + * Initialize the WT_BTREE structure. + */ +static void +__btree_initialize(WT_BTREE *btree, bool closing) +{ + uint32_t mask; + + /* + * This function exists as a place to discuss how the WT_BTREE structure + * is initialized (or re-initialized, when the object is re-opened). The + * upper-level handle code sets/clears flags in the WT_BTREE structure, + * plus the eviction/cache code reads/writes cache information. The + * latter happens in-between a forced drop and sweep discarding the + * tree (where the tree is still "open" and has pages being evicted from + * the cache), but it's no longer part of the namespace. For all those + * reasons, parts of the WT_BTREE object must persist after it's closed. + */ + if (closing) { + /* + * Closing: clear everything except cache/eviction information + * and one LSM flag. + */ + memset(btree, 0, WT_BTREE_CLEAR_SIZE); + F_CLR(btree, ~(WT_BTREE_LSM_PRIMARY | WT_BTREE_NO_EVICTION)); + } else { + /* Opening: clear everything except the special flags. */ + mask = F_MASK(btree, WT_BTREE_SPECIAL_FLAGS); + memset(btree, 0, sizeof(*btree)); + btree->flags = mask; + } +} + /* * __wt_btree_open -- * Open a Btree. @@ -33,7 +67,10 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) bool creation, forced_salvage, readonly; dhandle = session->dhandle; + btree = S2BT(session); + __btree_initialize(btree, false); + btree->dhandle = session->dhandle; /* Checkpoint files are readonly. */ readonly = dhandle->checkpoint != NULL || @@ -126,6 +163,20 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) } } + /* + * Eviction ignores trees until the handle's open flag is set, configure + * eviction before that happens. + * + * Files that can still be bulk-loaded cannot be evicted. + * Permanently cache-resident files can never be evicted. + * Special operations don't enable eviction. (The underlying commands + * may turn on eviction, but it's their decision.) + */ + if (btree->bulk_load_ok || + F_ISSET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_REBALANCE | + WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) + WT_ERR(__wt_evict_file_exclusive_on(session)); + if (0) { err: WT_TRET(__wt_btree_close(session)); } @@ -155,13 +206,15 @@ __wt_btree_close(WT_SESSION_IMPL *session) /* Close the underlying block manager reference. */ WT_TRET(bm->close(bm, session)); - - btree->bm = NULL; } /* Close the Huffman tree. */ __wt_btree_huffman_close(session); + if (btree->collator_owned && btree->collator->terminate != NULL) + WT_TRET(btree->collator->terminate( + btree->collator, &session->iface)); + /* Destroy locks. */ __wt_rwlock_destroy(session, &btree->ovfl_lock); __wt_spin_destroy(session, &btree->flush_lock); @@ -170,18 +223,7 @@ __wt_btree_close(WT_SESSION_IMPL *session) __wt_free(session, btree->key_format); __wt_free(session, btree->value_format); - if (btree->collator_owned) { - if (btree->collator->terminate != NULL) - WT_TRET(btree->collator->terminate( - btree->collator, &session->iface)); - btree->collator_owned = 0; - } - btree->collator = NULL; - btree->kencryptor = NULL; - - btree->bulk_load_ok = false; - - F_CLR(btree, WT_BTREE_SPECIAL_FLAGS); + __btree_initialize(btree, true); return (ret); } @@ -267,9 +309,9 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) WT_RET(__wt_config_gets(session, cfg, "cache_resident", &cval)); if (cval.val) - F_SET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION); + F_SET(btree, WT_BTREE_IN_MEMORY); else - F_CLR(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION); + F_CLR(btree, WT_BTREE_IN_MEMORY); WT_RET(__wt_config_gets(session, cfg, "ignore_in_memory_cache_size", &cval)); @@ -482,13 +524,10 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, bool creation) /* * Newly created objects can be used for cursor inserts or for bulk * loads; set a flag that's cleared when a row is inserted into the - * tree. Objects being bulk-loaded cannot be evicted, we set it - * globally, there's no point in searching empty trees for eviction. + * tree. */ - if (creation) { + if (creation) btree->bulk_load_ok = true; - __wt_btree_evictable(session, false); - } /* * A note about empty trees: the initial tree is a single root page. @@ -580,27 +619,6 @@ __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep) return (0); } -/* - * __wt_btree_evictable -- - * Setup or release a cache-resident tree. - */ -void -__wt_btree_evictable(WT_SESSION_IMPL *session, bool on) -{ - WT_BTREE *btree; - - btree = S2BT(session); - - /* Permanently cache-resident files can never be evicted. */ - if (F_ISSET(btree, WT_BTREE_IN_MEMORY)) - return; - - if (on) - F_CLR(btree, WT_BTREE_NO_EVICTION); - else - F_SET(btree, WT_BTREE_NO_EVICTION); -} - /* * __btree_preload -- * Pre-load internal pages. diff --git a/src/btree/bt_random.c b/src/btree/bt_random.c index 4c7ff861d26..25ede0a09ac 100644 --- a/src/btree/bt_random.c +++ b/src/btree/bt_random.c @@ -178,6 +178,8 @@ __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, bool eviction) WT_REF *current, *descent; uint32_t flags, i, entries, retry; + *refp = NULL; + btree = S2BT(session); current = NULL; retry = 100; @@ -201,16 +203,6 @@ restart: /* current = &btree->root; for (;;) { page = current->page; - /* - * When walking a tree for eviction, an exclusive operation may - * be in progress leaving the root page is not valid. Just give - * up in that case. - */ - if (page == NULL) { - WT_ASSERT(session, eviction); - break; - } - if (!WT_PAGE_IS_INTERNAL(page)) break; diff --git a/src/btree/bt_rebalance.c b/src/btree/bt_rebalance.c index 24b4f7bb33d..68848c7c8f5 100644 --- a/src/btree/bt_rebalance.c +++ b/src/btree/bt_rebalance.c @@ -406,12 +406,10 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_BTREE *btree; WT_DECL_RET; WT_REBALANCE_STUFF *rs, _rstuff; - bool evict_reset; WT_UNUSED(cfg); btree = S2BT(session); - evict_reset = false; /* * If the tree has never been written to disk, we're done, rebalance @@ -433,14 +431,6 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) /* Set the internal page tree type. */ rs->type = btree->root.page->type; - /* - * Get exclusive access to the file. (Not required, the only page in the - * cache is the root page, and that cannot be evicted; however, this way - * eviction ignores the tree entirely.) - */ - WT_ERR(__wt_evict_file_exclusive_on(session)); - evict_reset = true; - /* Recursively walk the tree. */ switch (rs->type) { case WT_PAGE_ROW_INT: @@ -471,10 +461,7 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) btree->root.page = rs->root; rs->root = NULL; -err: if (evict_reset) - __wt_evict_file_exclusive_off(session); - - /* Discard any leftover root page we created. */ +err: /* Discard any leftover root page we created. */ if (rs->root != NULL) { __wt_page_modify_clear(session, rs->root); __wt_page_out(session, &rs->root); diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c index fea979cac6e..165f932afb2 100644 --- a/src/btree/bt_slvg.c +++ b/src/btree/bt_slvg.c @@ -166,13 +166,11 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_DECL_RET; WT_STUFF *ss, stuff; uint32_t i, leaf_cnt; - bool evict_reset; WT_UNUSED(cfg); btree = S2BT(session); bm = btree->bm; - evict_reset = false; WT_CLEAR(stuff); ss = &stuff; @@ -183,13 +181,6 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_ERR(__wt_scr_alloc(session, 0, &ss->tmp1)); WT_ERR(__wt_scr_alloc(session, 0, &ss->tmp2)); - /* - * Salvage handles its own page eviction; get exclusive access to the - * file, have eviction ignore the tree entirely. - */ - WT_ERR(__wt_evict_file_exclusive_on(session)); - evict_reset = true; - /* * Step 1: * Inform the underlying block manager that we're salvaging the file. @@ -350,9 +341,6 @@ err: WT_TRET(bm->salvage_end(bm, session)); if (ss->root_ref.page != NULL) __wt_ref_out(session, &ss->root_ref); - if (evict_reset) - __wt_evict_file_exclusive_off(session); - /* Discard the leaf and overflow page memory. */ WT_TRET(__slvg_cleanup(session, ss)); diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c index 05990918215..3c90e580696 100644 --- a/src/btree/bt_vrfy.c +++ b/src/btree/bt_vrfy.c @@ -216,13 +216,11 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) ckpt->raw.data, ckpt->raw.size, root_addr, &root_addr_size, true)); - /* - * Ignore trees with no root page. - * Verify, then discard the checkpoint from the cache. - */ - if (root_addr_size != 0 && - (ret = __wt_btree_tree_open( - session, root_addr, root_addr_size)) == 0) { + /* Skip trees with no root page. */ + if (root_addr_size != 0) { + WT_ERR(__wt_btree_tree_open( + session, root_addr, root_addr_size)); + if (WT_VRFY_DUMP(vs)) WT_ERR(__wt_msg(session, "Root: %s %s", __wt_addr_string(session, @@ -230,14 +228,38 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) __wt_page_type_string( btree->root.page->type))); + __wt_evict_file_exclusive_off(session); + + /* Verify the tree. */ WT_WITH_PAGE_INDEX(session, ret = __verify_tree(session, &btree->root, vs)); + /* + * We have an exclusive lock on the handle, but we're + * swapping root pages in-and-out of that handle, and + * there's a race with eviction entering the tree and + * seeing an invalid root page. Eviction must work on + * trees being verified (else we'd have to do our own + * eviction), lock eviction out whenever we're loading + * a new root page. This loops works because we are + * called with eviction locked out, so we release the + * lock at the top of the loop and re-acquire it here. + */ + WT_TRET(__wt_evict_file_exclusive_on(session)); WT_TRET(__wt_cache_op(session, WT_SYNC_DISCARD)); } /* Unload the checkpoint. */ WT_TRET(bm->checkpoint_unload(bm, session)); + + /* + * We've finished one checkpoint's verification (verification, + * then cache eviction and checkpoint unload): if any errors + * occurred, quit. Done this way because otherwise we'd need + * at least two more state variables on error, one to know if + * we need to discard the tree from the cache and one to know + * if we need to unload the checkpoint. + */ WT_ERR(ret); /* Display the tree shape. */ diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c index ddaa2e5f70b..86484feb7c9 100644 --- a/src/btree/bt_walk.c +++ b/src/btree/bt_walk.c @@ -348,16 +348,19 @@ __tree_walk_internal(WT_SESSION_IMPL *session, /* If no page is active, begin a walk from the start/end of the tree. */ if (ref == NULL) { restart: /* - * We can reach here with a NULL or root reference; the release + * We can be here with a NULL or root WT_REF; the page release * function handles them internally, don't complicate this code * by calling them out. */ WT_ERR(__wt_page_release(session, couple, flags)); - couple = couple_orig = ref = &btree->root; - if (ref->page == NULL) - goto done; + /* + * We're not supposed to walk trees without root pages. As this + * has not always been the case, assert to debug that change. + */ + WT_ASSERT(session, btree->root.page != NULL); + couple = couple_orig = ref = &btree->root; initial_descent = true; goto descend; } diff --git a/src/evict/evict_file.c b/src/evict/evict_file.c index 17b038fb003..3bc8fe36e5e 100644 --- a/src/evict/evict_file.c +++ b/src/evict/evict_file.c @@ -15,15 +15,27 @@ int __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) { + WT_BTREE *btree; WT_DECL_RET; WT_PAGE *page; WT_REF *next_ref, *ref; + btree = S2BT(session); + /* - * We need exclusive access to the file -- disable ordinary eviction - * and drain any blocks already queued. + * We need exclusive access to the file, we're about to discard the root + * page. Assert eviction has been locked out. */ - WT_RET(__wt_evict_file_exclusive_on(session)); + WT_ASSERT(session, + F_ISSET(btree, WT_BTREE_NO_EVICTION) || + !F_ISSET(session->dhandle, WT_DHANDLE_OPEN)); + + /* + * We do discard objects without pages in memory. If that's the case, + * we're done. + */ + if (btree->root.page == NULL) + return (0); /* Make sure the oldest transaction ID is up-to-date. */ WT_RET(__wt_txn_update_oldest( @@ -102,7 +114,5 @@ err: /* On error, clear any left-over tree walk. */ session, next_ref, WT_READ_NO_EVICT)); } - __wt_evict_file_exclusive_off(session); - return (ret); } diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index d16594b0816..e59a6c2f2d9 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1431,10 +1431,20 @@ retry: while (slot < max_entries) { if (!F_ISSET(btree, WT_BTREE_NO_EVICTION) && !__wt_spin_trylock(session, &cache->evict_walk_lock)) { if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) { + /* + * Assert the handle has a root page: eviction + * should have been locked out if the tree is + * being discarded or the root page is changing. + * As this has not always been the case, assert + * to debug that change. + */ + WT_ASSERT(session, btree->root.page != NULL); + cache->evict_file_next = dhandle; - WT_WITH_DHANDLE(session, dhandle, ret = - __evict_walk_file(session, queue, - max_entries, &slot)); + WT_WITH_DHANDLE(session, dhandle, + ret = __evict_walk_file( + session, queue, max_entries, &slot)); + WT_ASSERT(session, session->split_gen == 0); } __wt_spin_unlock(session, &cache->evict_walk_lock); diff --git a/src/include/btree.h b/src/include/btree.h index 2aa0e470f59..39971cd2987 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -118,10 +118,6 @@ struct __wt_btree { uint64_t last_recno; /* Column-store last record number */ - WT_REF root; /* Root page reference */ - bool modified; /* If the tree ever modified */ - bool bulk_load_ok; /* Bulk-load is a possibility */ - WT_BM *bm; /* Block manager reference */ u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */ @@ -130,6 +126,28 @@ struct __wt_btree { uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ uint64_t write_gen; /* Write generation */ + enum { + WT_CKPT_OFF, WT_CKPT_PREPARE, WT_CKPT_RUNNING + } checkpointing; /* Checkpoint in progress */ + + /* + * We flush pages from the tree (in order to make checkpoint faster), + * without a high-level lock. To avoid multiple threads flushing at + * the same time, lock the tree. + */ + WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */ + + bool modified; /* If the tree ever modified */ + bool bulk_load_ok; /* Bulk-load is a possibility */ + + /* + * The tree's cache and eviction information persist after the handle + * is closed (clean cache pages may remain after the tree is closed). + * Be careful clearing the WT_BTREE structure. + */ +#define WT_BTREE_CLEAR_SIZE (offsetof(WT_BTREE, root)) + WT_REF root; /* Root page reference */ + uint64_t bytes_inmem; /* Cache bytes in memory. */ uint64_t bytes_dirty_intl; /* Bytes in dirty internal pages. */ uint64_t bytes_dirty_leaf; /* Bytes in dirty leaf pages. */ @@ -144,17 +162,6 @@ struct __wt_btree { int evict_start_type; /* Start position for eviction walk (see WT_EVICT_WALK_START). */ - enum { - WT_CKPT_OFF, WT_CKPT_PREPARE, WT_CKPT_RUNNING - } checkpointing; /* Checkpoint in progress */ - - /* - * We flush pages from the tree (in order to make checkpoint faster), - * without a high-level lock. To avoid multiple threads flushing at - * the same time, lock the tree. - */ - WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */ - /* Flags values up to 0xff are reserved for WT_DHANDLE_* */ #define WT_BTREE_BULK 0x000100 /* Bulk-load handle */ #define WT_BTREE_IGNORE_CACHE 0x000200 /* Cache-resident object */ diff --git a/src/include/extern.h b/src/include/extern.h index 19ad9a880df..8e86eedf051 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -136,7 +136,6 @@ extern int __wt_btree_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE extern void __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, bool is_recno) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_tree_open( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_btree_evictable(WT_SESSION_IMPL *session, bool on) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/session.h b/src/include/session.h index 085f871a34f..f3092dc3c6c 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -151,20 +151,16 @@ struct __wt_session_impl { uint32_t flags; /* - * The split stash memory and hazard information persist past session - * close because they are accessed by threads of control other than the - * thread owning the session. - * + * All of the following fields live at the end of the structure so it's + * easier to clear everything but the fields that persist. + */ +#define WT_SESSION_CLEAR_SIZE (offsetof(WT_SESSION_IMPL, rnd)) + + /* * The random number state persists past session close because we don't - * want to repeatedly allocate repeated values for skiplist depth if the + * want to repeatedly use the same values for skiplist depth when the * application isn't caching sessions. - * - * All of these fields live at the end of the structure so it's easier - * to clear everything but the fields that persist. */ -#define WT_SESSION_CLEAR_SIZE(s) \ - (WT_PTRDIFF(&(s)->rnd, s)) - WT_RAND_STATE rnd; /* Random number generation state */ /* Hashed handle reference list array */ @@ -173,6 +169,9 @@ struct __wt_session_impl { TAILQ_HEAD(__tables_hash, __wt_table) *tablehash; /* + * Split stash memory persists past session close because it's accessed + * by threads of control other than the thread owning the session. + * * Splits can "free" memory that may still be in use, and we use a * split generation number to track it, that is, the session stores a * reference to the memory and allocates a split generation; when no @@ -192,6 +191,9 @@ struct __wt_session_impl { /* * Hazard pointers. * + * Hazard information persists past session close because it's accessed + * by threads of control other than the thread owning the session. + * * Use the non-NULL state of the hazard field to know if the session has * previously been initialized. */ diff --git a/src/session/session_api.c b/src/session/session_api.c index d282c5d0c32..3d13287fbe6 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -128,7 +128,7 @@ __session_clear(WT_SESSION_IMPL *session) * * For these reasons, be careful when clearing the session structure. */ - memset(session, 0, WT_SESSION_CLEAR_SIZE(session)); + memset(session, 0, WT_SESSION_CLEAR_SIZE); WT_INIT_LSN(&session->bg_sync_lsn); diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c index ee9bddbfc19..469da21a448 100644 --- a/src/session/session_dhandle.c +++ b/src/session/session_dhandle.c @@ -574,12 +574,13 @@ __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) checkpoint, NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY)); /* - * Flush any pages in this checkpoint from the cache (we are about to - * re-write the checkpoint which will mean cached pages no longer have - * valid contents). This is especially noticeable with memory mapped - * files, since changes to the underlying file are visible to the in - * memory pages. + * Get exclusive access to the handle and then flush any pages in this + * checkpoint from the cache (we are about to re-write the checkpoint + * which will mean cached pages no longer have valid contents). This + * is especially noticeable with memory mapped files, since changes to + * the underlying file are visible to the in-memory pages. */ + WT_ERR(__wt_evict_file_exclusive_on(session)); WT_ERR(__wt_cache_op(session, WT_SYNC_DISCARD)); /* -- cgit v1.2.1 From 930369ce8dd3fcf43a77dc6c911b0038748228d3 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 28 Feb 2017 17:26:14 -0500 Subject: WT-3193 Close a race between verify opening a handle and eviction visiting it (#3315) * Eviction uses the WT_BTREE.dhandle reference because a WT_BTREE is what's stored in the WT_EVICT_ENTRY structure. --- src/btree/bt_handle.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index d714dab6000..3b64581fe1e 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -21,6 +21,7 @@ static int __btree_tree_open_empty(WT_SESSION_IMPL *, bool); static void __btree_initialize(WT_BTREE *btree, bool closing) { + WT_DATA_HANDLE *dhandle; uint32_t mask; /* @@ -35,11 +36,16 @@ __btree_initialize(WT_BTREE *btree, bool closing) */ if (closing) { /* - * Closing: clear everything except cache/eviction information - * and one LSM flag. + * Closing: clear everything except cache/eviction information. + * (The LSM flag is used during cache eviction as an accounting + * modifier, eviction also uses the WT_DATA_HANDLE reference.) */ + dhandle = btree->dhandle; + memset(btree, 0, WT_BTREE_CLEAR_SIZE); F_CLR(btree, ~(WT_BTREE_LSM_PRIMARY | WT_BTREE_NO_EVICTION)); + + btree->dhandle = dhandle; } else { /* Opening: clear everything except the special flags. */ mask = F_MASK(btree, WT_BTREE_SPECIAL_FLAGS); -- cgit v1.2.1 From af29e5e78eed5c80427179024543bd88c5f2549d Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 28 Feb 2017 22:55:30 -0500 Subject: WT-3193 Close a race between verify opening a handle and eviction visiting it (#3316) Leave the WT_BTREE structure mostly untouched until discarding the upper-level data handle, hopefully resolving the problems we've been having with accessing WT_BTREE fields after "closing" the handle. --- src/btree/bt_handle.c | 94 +++++++++++++++++++++++++------------------------ src/conn/conn_dhandle.c | 3 +- src/include/btree.h | 69 ++++++++++++++++-------------------- src/include/extern.h | 1 + 4 files changed, 82 insertions(+), 85 deletions(-) diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 3b64581fe1e..d30eee1e282 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -14,46 +14,6 @@ static int __btree_page_sizes(WT_SESSION_IMPL *); static int __btree_preload(WT_SESSION_IMPL *); static int __btree_tree_open_empty(WT_SESSION_IMPL *, bool); -/* - * __btree_initialize -- - * Initialize the WT_BTREE structure. - */ -static void -__btree_initialize(WT_BTREE *btree, bool closing) -{ - WT_DATA_HANDLE *dhandle; - uint32_t mask; - - /* - * This function exists as a place to discuss how the WT_BTREE structure - * is initialized (or re-initialized, when the object is re-opened). The - * upper-level handle code sets/clears flags in the WT_BTREE structure, - * plus the eviction/cache code reads/writes cache information. The - * latter happens in-between a forced drop and sweep discarding the - * tree (where the tree is still "open" and has pages being evicted from - * the cache), but it's no longer part of the namespace. For all those - * reasons, parts of the WT_BTREE object must persist after it's closed. - */ - if (closing) { - /* - * Closing: clear everything except cache/eviction information. - * (The LSM flag is used during cache eviction as an accounting - * modifier, eviction also uses the WT_DATA_HANDLE reference.) - */ - dhandle = btree->dhandle; - - memset(btree, 0, WT_BTREE_CLEAR_SIZE); - F_CLR(btree, ~(WT_BTREE_LSM_PRIMARY | WT_BTREE_NO_EVICTION)); - - btree->dhandle = dhandle; - } else { - /* Opening: clear everything except the special flags. */ - mask = F_MASK(btree, WT_BTREE_SPECIAL_FLAGS); - memset(btree, 0, sizeof(*btree)); - btree->flags = mask; - } -} - /* * __wt_btree_open -- * Open a Btree. @@ -68,15 +28,27 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) WT_DATA_HANDLE *dhandle; WT_DECL_RET; size_t root_addr_size; + uint32_t mask; uint8_t root_addr[WT_BTREE_MAX_ADDR_COOKIE]; const char *filename; bool creation, forced_salvage, readonly; - dhandle = session->dhandle; - + /* + * This may be a re-open of an underlying object and we want to clear + * everything. We can't clear the operation flags, however, they're + * set by the connection handle software. + */ btree = S2BT(session); - __btree_initialize(btree, false); - btree->dhandle = session->dhandle; + mask = F_MASK(btree, WT_BTREE_SPECIAL_FLAGS); + memset(btree, 0, sizeof(*btree)); + btree->flags = mask; + + /* + * Set the data handle immediately, our called functions reasonably + * use it. + */ + dhandle = session->dhandle; + btree->dhandle = dhandle; /* Checkpoint files are readonly. */ readonly = dhandle->checkpoint != NULL || @@ -204,7 +176,24 @@ __wt_btree_close(WT_SESSION_IMPL *session) btree = S2BT(session); + /* + * The close process isn't the same as discarding the handle: we might + * re-open the handle, which isn't a big deal, but the backing blocks + * for the handle may not yet have been discarded from the cache, and + * eviction uses WT_BTREE structure elements. Free backing resources + * but leave the rest alone, and we'll discard the structure when we + * discard the data handle. + * + * Handles can be closed multiple times, ignore all but the first. + */ + if (F_ISSET(btree, WT_BTREE_CLOSED)) + return (0); + F_SET(btree, WT_BTREE_CLOSED); + + /* Discard any underlying block manager resources. */ if ((bm = btree->bm) != NULL) { + btree->bm = NULL; + /* Unload the checkpoint, unless it's a special command. */ if (!F_ISSET(btree, WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) @@ -217,6 +206,7 @@ __wt_btree_close(WT_SESSION_IMPL *session) /* Close the Huffman tree. */ __wt_btree_huffman_close(session); + /* Terminate any associated collator. */ if (btree->collator_owned && btree->collator->terminate != NULL) WT_TRET(btree->collator->terminate( btree->collator, &session->iface)); @@ -229,11 +219,23 @@ __wt_btree_close(WT_SESSION_IMPL *session) __wt_free(session, btree->key_format); __wt_free(session, btree->value_format); - __btree_initialize(btree, true); - return (ret); } +/* + * __wt_btree_discard -- + * Discard a Btree. + */ +void +__wt_btree_discard(WT_SESSION_IMPL *session, void **handlep) +{ + WT_BTREE *btree; + + btree = *handlep; + *handlep = NULL; + __wt_overwrite_and_free(session, btree); +} + /* * __btree_conf -- * Configure a WT_BTREE structure. diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 866b8633f71..7b265c372db 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -18,7 +18,7 @@ __conn_dhandle_destroy(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle) __wt_rwlock_destroy(session, &dhandle->rwlock); __wt_free(session, dhandle->name); __wt_free(session, dhandle->checkpoint); - __wt_free(session, dhandle->handle); + __wt_btree_discard(session, &dhandle->handle); __wt_spin_destroy(session, &dhandle->close_lock); __wt_stat_dsrc_discard(session, dhandle); __wt_overwrite_and_free(session, dhandle); @@ -192,6 +192,7 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) } WT_TRET(__wt_btree_close(session)); + F_CLR(btree, WT_BTREE_SPECIAL_FLAGS); /* * If we marked a handle dead it will be closed by sweep, via diff --git a/src/include/btree.h b/src/include/btree.h index 39971cd2987..69ab2070eb9 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -118,6 +118,10 @@ struct __wt_btree { uint64_t last_recno; /* Column-store last record number */ + WT_REF root; /* Root page reference */ + bool modified; /* If the tree ever modified */ + bool bulk_load_ok; /* Bulk-load is a possibility */ + WT_BM *bm; /* Block manager reference */ u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */ @@ -126,6 +130,19 @@ struct __wt_btree { uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ uint64_t write_gen; /* Write generation */ + uint64_t bytes_inmem; /* Cache bytes in memory. */ + uint64_t bytes_dirty_intl; /* Bytes in dirty internal pages. */ + uint64_t bytes_dirty_leaf; /* Bytes in dirty leaf pages. */ + + WT_REF *evict_ref; /* Eviction thread's location */ + uint64_t evict_priority; /* Relative priority of cached pages */ + u_int evict_walk_period; /* Skip this many LRU walks */ + u_int evict_walk_saved; /* Saved walk skips for checkpoints */ + u_int evict_walk_skips; /* Number of walks skipped */ + u_int evict_disabled; /* Eviction disabled count */ + volatile uint32_t evict_busy; /* Count of threads in eviction */ + int evict_start_type; /* Start position for eviction walk + (see WT_EVICT_WALK_START). */ enum { WT_CKPT_OFF, WT_CKPT_PREPARE, WT_CKPT_RUNNING } checkpointing; /* Checkpoint in progress */ @@ -137,46 +154,22 @@ struct __wt_btree { */ WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */ - bool modified; /* If the tree ever modified */ - bool bulk_load_ok; /* Bulk-load is a possibility */ - - /* - * The tree's cache and eviction information persist after the handle - * is closed (clean cache pages may remain after the tree is closed). - * Be careful clearing the WT_BTREE structure. - */ -#define WT_BTREE_CLEAR_SIZE (offsetof(WT_BTREE, root)) - WT_REF root; /* Root page reference */ - - uint64_t bytes_inmem; /* Cache bytes in memory. */ - uint64_t bytes_dirty_intl; /* Bytes in dirty internal pages. */ - uint64_t bytes_dirty_leaf; /* Bytes in dirty leaf pages. */ - - WT_REF *evict_ref; /* Eviction thread's location */ - uint64_t evict_priority; /* Relative priority of cached pages */ - u_int evict_walk_period; /* Skip this many LRU walks */ - u_int evict_walk_saved; /* Saved walk skips for checkpoints */ - u_int evict_walk_skips; /* Number of walks skipped */ - u_int evict_disabled; /* Eviction disabled count */ - volatile uint32_t evict_busy; /* Count of threads in eviction */ - int evict_start_type; /* Start position for eviction walk - (see WT_EVICT_WALK_START). */ - /* Flags values up to 0xff are reserved for WT_DHANDLE_* */ #define WT_BTREE_BULK 0x000100 /* Bulk-load handle */ -#define WT_BTREE_IGNORE_CACHE 0x000200 /* Cache-resident object */ -#define WT_BTREE_IN_MEMORY 0x000400 /* Cache-resident object */ -#define WT_BTREE_LOOKASIDE 0x000800 /* Look-aside table */ -#define WT_BTREE_LSM_PRIMARY 0x001000 /* Handle is current LSM primary */ -#define WT_BTREE_NO_CHECKPOINT 0x002000 /* Disable checkpoints */ -#define WT_BTREE_NO_EVICTION 0x004000 /* Disable eviction */ -#define WT_BTREE_NO_LOGGING 0x008000 /* Disable logging */ -#define WT_BTREE_NO_RECONCILE 0x010000 /* Allow splits, even with no evict */ -#define WT_BTREE_REBALANCE 0x020000 /* Handle is for rebalance */ -#define WT_BTREE_SALVAGE 0x040000 /* Handle is for salvage */ -#define WT_BTREE_SKIP_CKPT 0x080000 /* Handle skipped checkpoint */ -#define WT_BTREE_UPGRADE 0x100000 /* Handle is for upgrade */ -#define WT_BTREE_VERIFY 0x200000 /* Handle is for verify */ +#define WT_BTREE_CLOSED 0x000200 /* Handle closed */ +#define WT_BTREE_IGNORE_CACHE 0x000400 /* Cache-resident object */ +#define WT_BTREE_IN_MEMORY 0x000800 /* Cache-resident object */ +#define WT_BTREE_LOOKASIDE 0x001000 /* Look-aside table */ +#define WT_BTREE_LSM_PRIMARY 0x002000 /* Handle is current LSM primary */ +#define WT_BTREE_NO_CHECKPOINT 0x004000 /* Disable checkpoints */ +#define WT_BTREE_NO_EVICTION 0x008000 /* Disable eviction */ +#define WT_BTREE_NO_LOGGING 0x010000 /* Disable logging */ +#define WT_BTREE_NO_RECONCILE 0x020000 /* Allow splits, even with no evict */ +#define WT_BTREE_REBALANCE 0x040000 /* Handle is for rebalance */ +#define WT_BTREE_SALVAGE 0x080000 /* Handle is for salvage */ +#define WT_BTREE_SKIP_CKPT 0x100000 /* Handle skipped checkpoint */ +#define WT_BTREE_UPGRADE 0x200000 /* Handle is for upgrade */ +#define WT_BTREE_VERIFY 0x400000 /* Handle is for verify */ uint32_t flags; }; diff --git a/src/include/extern.h b/src/include/extern.h index 8e86eedf051..07cc3ce9921 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -133,6 +133,7 @@ extern void __wt_free_ref_index(WT_SESSION_IMPL *session, WT_PAGE *page, WT_PAGE extern void __wt_free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_btree_discard(WT_SESSION_IMPL *session, void **handlep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, bool is_recno) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_tree_open( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -- cgit v1.2.1 From f70d3773671a5a9319900b4aef57bdc1a67afdc0 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 2 Mar 2017 00:36:42 -0500 Subject: WT-3199 bug: eviction assertion failure (#3317) * Don't set WT_BTREE_NO_EVICTION in LSM without going through the locking dance. * Change so nothing in the WT_BTREE structure is cleaned up or discarded until that structure is being discarded or re-opened. This doesn't fix any bugs, but I think it's less fragile going forward. --- src/btree/bt_handle.c | 85 ++++++++++++++++++++++++++++++++----------------- src/conn/conn_dhandle.c | 12 ++++--- src/include/btree.i | 14 +++++--- src/include/extern.h | 2 +- src/lsm/lsm_cursor.c | 3 +- src/lsm/lsm_work_unit.c | 5 +-- 6 files changed, 79 insertions(+), 42 deletions(-) diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index d30eee1e282..e8eb37bfb8e 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -14,6 +14,44 @@ static int __btree_page_sizes(WT_SESSION_IMPL *); static int __btree_preload(WT_SESSION_IMPL *); static int __btree_tree_open_empty(WT_SESSION_IMPL *, bool); +/* + * __btree_clear -- + * Clear a Btree, either on handle discard or re-open. + */ +static int +__btree_clear(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + WT_DECL_RET; + + btree = S2BT(session); + + /* + * If the tree hasn't gone through an open/close cycle, there's no + * cleanup to be done. + */ + if (!F_ISSET(btree, WT_BTREE_CLOSED)) + return (0); + + /* Close the Huffman tree. */ + __wt_btree_huffman_close(session); + + /* Terminate any associated collator. */ + if (btree->collator_owned && btree->collator->terminate != NULL) + WT_TRET(btree->collator->terminate( + btree->collator, &session->iface)); + + /* Destroy locks. */ + __wt_rwlock_destroy(session, &btree->ovfl_lock); + __wt_spin_destroy(session, &btree->flush_lock); + + /* Free allocated memory. */ + __wt_free(session, btree->key_format); + __wt_free(session, btree->value_format); + + return (ret); +} + /* * __wt_btree_open -- * Open a Btree. @@ -33,21 +71,21 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) const char *filename; bool creation, forced_salvage, readonly; + btree = S2BT(session); + dhandle = session->dhandle; + /* - * This may be a re-open of an underlying object and we want to clear - * everything. We can't clear the operation flags, however, they're - * set by the connection handle software. + * This may be a re-open of an underlying object and we have to clean + * up. We can't clear the operation flags, however, they're set by the + * connection handle software that called us. */ - btree = S2BT(session); + WT_RET(__btree_clear(session)); + mask = F_MASK(btree, WT_BTREE_SPECIAL_FLAGS); memset(btree, 0, sizeof(*btree)); btree->flags = mask; - /* - * Set the data handle immediately, our called functions reasonably - * use it. - */ - dhandle = session->dhandle; + /* Set the data handle first, our called functions reasonably use it. */ btree->dhandle = dhandle; /* Checkpoint files are readonly. */ @@ -203,22 +241,6 @@ __wt_btree_close(WT_SESSION_IMPL *session) WT_TRET(bm->close(bm, session)); } - /* Close the Huffman tree. */ - __wt_btree_huffman_close(session); - - /* Terminate any associated collator. */ - if (btree->collator_owned && btree->collator->terminate != NULL) - WT_TRET(btree->collator->terminate( - btree->collator, &session->iface)); - - /* Destroy locks. */ - __wt_rwlock_destroy(session, &btree->ovfl_lock); - __wt_spin_destroy(session, &btree->flush_lock); - - /* Free allocated memory. */ - __wt_free(session, btree->key_format); - __wt_free(session, btree->value_format); - return (ret); } @@ -226,14 +248,19 @@ __wt_btree_close(WT_SESSION_IMPL *session) * __wt_btree_discard -- * Discard a Btree. */ -void -__wt_btree_discard(WT_SESSION_IMPL *session, void **handlep) +int +__wt_btree_discard(WT_SESSION_IMPL *session) { WT_BTREE *btree; + WT_DECL_RET; + + ret = __btree_clear(session); - btree = *handlep; - *handlep = NULL; + btree = S2BT(session); __wt_overwrite_and_free(session, btree); + session->dhandle->handle = NULL; + + return (ret); } /* diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 7b265c372db..6958b79f10f 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -12,16 +12,20 @@ * __conn_dhandle_destroy -- * Destroy a data handle. */ -static void +static int __conn_dhandle_destroy(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle) { + WT_DECL_RET; + + WT_WITH_DHANDLE(session, dhandle, ret = __wt_btree_discard(session)); + __wt_rwlock_destroy(session, &dhandle->rwlock); __wt_free(session, dhandle->name); __wt_free(session, dhandle->checkpoint); - __wt_btree_discard(session, &dhandle->handle); __wt_spin_destroy(session, &dhandle->close_lock); __wt_stat_dsrc_discard(session, dhandle); __wt_overwrite_and_free(session, dhandle); + return (ret); } /* @@ -76,7 +80,7 @@ __wt_conn_dhandle_alloc( session->dhandle = dhandle; return (0); -err: __conn_dhandle_destroy(session, dhandle); +err: WT_TRET(__conn_dhandle_destroy(session, dhandle)); return (ret); } @@ -604,7 +608,7 @@ __wt_conn_dhandle_discard_single( */ if (ret == 0 || final) { __conn_btree_config_clear(session); - __conn_dhandle_destroy(session, dhandle); + WT_TRET(__conn_dhandle_destroy(session, dhandle)); session->dhandle = NULL; } diff --git a/src/include/btree.i b/src/include/btree.i index 315efa86fa6..6dda2428122 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1549,7 +1549,7 @@ __wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize) * __wt_btree_lsm_switch_primary -- * Switch a btree handle to/from the current primary chunk of an LSM tree. */ -static inline void +static inline int __wt_btree_lsm_switch_primary(WT_SESSION_IMPL *session, bool on) { WT_BTREE *btree; @@ -1563,13 +1563,15 @@ __wt_btree_lsm_switch_primary(WT_SESSION_IMPL *session, bool on) cache = S2C(session)->cache; root = btree->root.page; - if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) - F_SET(btree, WT_BTREE_LSM_PRIMARY | WT_BTREE_NO_EVICTION); + if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { + F_SET(btree, WT_BTREE_LSM_PRIMARY); + WT_RET(__wt_evict_file_exclusive_on(session)); + } if (!on && F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { pindex = WT_INTL_INDEX_GET_SAFE(root); if (!F_ISSET(btree, WT_BTREE_NO_EVICTION) || pindex->entries != 1) - return; + return (0); first = pindex->index[0]; /* @@ -1590,8 +1592,10 @@ __wt_btree_lsm_switch_primary(WT_SESSION_IMPL *session, bool on) (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); } - F_CLR(btree, WT_BTREE_LSM_PRIMARY | WT_BTREE_NO_EVICTION); + F_CLR(btree, WT_BTREE_LSM_PRIMARY); + __wt_evict_file_exclusive_off(session); } + return (0); } /* diff --git a/src/include/extern.h b/src/include/extern.h index 07cc3ce9921..d0c9655fafb 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -133,7 +133,7 @@ extern void __wt_free_ref_index(WT_SESSION_IMPL *session, WT_PAGE *page, WT_PAGE extern void __wt_free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_btree_discard(WT_SESSION_IMPL *session, void **handlep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_btree_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, bool is_recno) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_tree_open( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 60afbc99ade..116740f8f0c 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -699,7 +699,8 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) { if (btree->bulk_load_ok) { btree->bulk_load_ok = false; WT_WITH_BTREE(session, btree, - __wt_btree_lsm_switch_primary(session, true)); + ret = __wt_btree_lsm_switch_primary(session, true)); + WT_ERR(ret); } } diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index 4349acf7b55..c9c350c5ac9 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -383,8 +383,9 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, * forced eviction. */ WT_ERR(__wt_session_get_btree(session, chunk->uri, NULL, NULL, 0)); - __wt_btree_lsm_switch_primary(session, false); - WT_ERR(__wt_session_release_btree(session)); + WT_TRET(__wt_btree_lsm_switch_primary(session, false)); + WT_TRET(__wt_session_release_btree(session)); + WT_ERR(ret); /* Make sure we aren't pinning a transaction ID. */ __wt_txn_release_snapshot(session); -- cgit v1.2.1 From 62f0543765deaf2f11b3c2e78d82940e500f004b Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 2 Mar 2017 21:17:25 -0500 Subject: WT-3203 bulk-load state changes can race (#3318) * The bulk-load state change (where multiple threads of control turn off the possibility of bulk-load in a tree), has always been able to race, but it's potentially dangerous now that turning off bulk-load involves calling `__wt_evict_file_exclusive_off()`. In the current tree, there's a diagnostic-only test that might fail because of this race. * The WT_BTREE_NO_EVICTION flag is no longer set other than through the __wt_evict_file_exclusive_on/off functions; remove that flag and use the WT_BTREE.evict_disabled counter by itself. --- src/btree/bt_cursor.c | 40 ++++++++++++++++++++++++---------------- src/btree/bt_handle.c | 2 +- src/btree/bt_read.c | 2 +- src/evict/evict_file.c | 2 +- src/evict/evict_lru.c | 44 +++++++++++++------------------------------- src/include/btree.h | 20 ++++++++++---------- src/include/btree.i | 7 +++---- 7 files changed, 53 insertions(+), 64 deletions(-) diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 6a48c5f752b..4634059589b 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -54,6 +54,26 @@ __cursor_size_chk(WT_SESSION_IMPL *session, WT_ITEM *kv) return (0); } +/* + * __cursor_disable_bulk -- + * Disable bulk loads into a tree. + */ +static inline void +__cursor_disable_bulk(WT_SESSION_IMPL *session, WT_BTREE *btree) +{ + /* + * Once a tree is no longer empty, eviction should pay attention to it, + * and it's no longer possible to bulk-load into it. + * + * We use a compare-and-swap here to avoid races among the first + * inserts into a tree. Eviction is disabled when an empty tree is + * opened, it must only be enabled once. + */ + if (btree->bulk_load_ok && + __wt_atomic_cas8(&btree->bulk_load_ok, 1, 0)) + __wt_evict_file_exclusive_off(session); +} + /* * __cursor_fix_implicit -- * Return if search went past the end of the tree. @@ -506,14 +526,8 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_RET(__cursor_size_chk(session, &cursor->key)); WT_RET(__cursor_size_chk(session, &cursor->value)); - /* - * The tree is no longer empty: eviction should pay attention to it, - * and it's no longer possible to bulk-load into it. - */ - if (btree->bulk_load_ok) { - btree->bulk_load_ok = false; - __wt_evict_file_exclusive_off(session); - } + /* It's no longer possible to bulk-load into the tree. */ + __cursor_disable_bulk(session, btree); retry: WT_RET(__cursor_func_init(cbt, true)); @@ -760,14 +774,8 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_RET(__cursor_size_chk(session, &cursor->key)); WT_RET(__cursor_size_chk(session, &cursor->value)); - /* - * The tree is no longer empty: eviction should pay attention to it, - * and it's no longer possible to bulk-load into it. - */ - if (btree->bulk_load_ok) { - btree->bulk_load_ok = false; - __wt_evict_file_exclusive_off(session); - } + /* It's no longer possible to bulk-load into the tree. */ + __cursor_disable_bulk(session, btree); retry: WT_RET(__cursor_func_init(cbt, true)); diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index e8eb37bfb8e..ff199eb1e0e 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -562,7 +562,7 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, bool creation) * tree. */ if (creation) - btree->bulk_load_ok = true; + btree->bulk_load_ok = 1; /* * A note about empty trees: the initial tree is a single root page. diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index 39f9e1159cb..e87ddc082f2 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -590,7 +590,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags */ if (LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(session, WT_SESSION_NO_EVICTION) || - (F_ISSET(btree, WT_BTREE_NO_EVICTION) && + (btree->evict_disabled > 0 && !F_ISSET(btree, WT_BTREE_NO_RECONCILE))) goto skip_evict; diff --git a/src/evict/evict_file.c b/src/evict/evict_file.c index 3bc8fe36e5e..3d8f4a61ca7 100644 --- a/src/evict/evict_file.c +++ b/src/evict/evict_file.c @@ -27,7 +27,7 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) * page. Assert eviction has been locked out. */ WT_ASSERT(session, - F_ISSET(btree, WT_BTREE_NO_EVICTION) || + btree->evict_disabled > 0 || !F_ISSET(session->dhandle, WT_DHANDLE_OPEN)); /* diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index e59a6c2f2d9..6863533acfb 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -824,31 +824,19 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) btree = S2BT(session); cache = S2C(session)->cache; - /* - * Hold the walk lock to set the no-eviction flag. - * - * The no-eviction flag can be set permanently, in which case we never - * increment the no-eviction count. - */ + /* Hold the walk lock to turn off eviction. */ __wt_spin_lock(session, &cache->evict_walk_lock); - if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) { - if (btree->evict_disabled != 0) - ++btree->evict_disabled; + if (++btree->evict_disabled > 1) { __wt_spin_unlock(session, &cache->evict_walk_lock); return (0); } - ++btree->evict_disabled; /* * Ensure no new pages from the file will be queued for eviction after - * this point. + * this point, then clear any existing LRU eviction walk for the file. */ - F_SET(btree, WT_BTREE_NO_EVICTION); (void)__wt_atomic_addv32(&cache->pass_intr, 1); - - /* Clear any existing LRU eviction walk for the file. */ - WT_WITH_PASS_LOCK(session, - ret = __evict_clear_walk(session)); + WT_WITH_PASS_LOCK(session, ret = __evict_clear_walk(session)); (void)__wt_atomic_subv32(&cache->pass_intr, 1); WT_ERR(ret); @@ -879,7 +867,6 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) if (0) { err: --btree->evict_disabled; - F_CLR(btree, WT_BTREE_NO_EVICTION); } __wt_spin_unlock(session, &cache->evict_walk_lock); return (ret); @@ -904,16 +891,11 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) */ WT_DIAGNOSTIC_YIELD; - WT_ASSERT(session, - btree->evict_ref == NULL && F_ISSET(btree, WT_BTREE_NO_EVICTION)); - - /* - * The no-eviction flag can be set permanently, in which case we never - * increment the no-eviction count. - */ + /* Hold the walk lock to turn on eviction. */ __wt_spin_lock(session, &cache->evict_walk_lock); - if (btree->evict_disabled > 0 && --btree->evict_disabled == 0) - F_CLR(btree, WT_BTREE_NO_EVICTION); + WT_ASSERT(session, + btree->evict_ref == NULL && btree->evict_disabled > 0); + --btree->evict_disabled; __wt_spin_unlock(session, &cache->evict_walk_lock); } @@ -1372,7 +1354,7 @@ retry: while (slot < max_entries) { /* Skip files that don't allow eviction. */ btree = dhandle->handle; - if (F_ISSET(btree, WT_BTREE_NO_EVICTION)) + if (btree->evict_disabled > 0) continue; /* @@ -1428,9 +1410,9 @@ retry: while (slot < max_entries) { * the tree's current eviction point, and part of the process is * waiting on this thread to acknowledge that action. */ - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION) && + if (btree->evict_disabled == 0 && !__wt_spin_trylock(session, &cache->evict_walk_lock)) { - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) { + if (btree->evict_disabled == 0) { /* * Assert the handle has a root page: eviction * should have been locked out if the tree is @@ -2249,7 +2231,7 @@ __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) page = ref->page; if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) || - F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION)) + S2BT(session)->evict_disabled > 0) return (false); /* Append to the urgent queue if we can. */ @@ -2259,7 +2241,7 @@ __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) __wt_spin_lock(session, &cache->evict_queue_lock); if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) || - F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION)) + S2BT(session)->evict_disabled > 0) goto done; __wt_spin_lock(session, &urgent_queue->evict_lock); diff --git a/src/include/btree.h b/src/include/btree.h index 69ab2070eb9..fc7cd352883 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -120,7 +120,8 @@ struct __wt_btree { WT_REF root; /* Root page reference */ bool modified; /* If the tree ever modified */ - bool bulk_load_ok; /* Bulk-load is a possibility */ + uint8_t bulk_load_ok; /* Bulk-load is a possibility + (want a bool but needs atomic cas) */ WT_BM *bm; /* Block manager reference */ u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */ @@ -139,7 +140,7 @@ struct __wt_btree { u_int evict_walk_period; /* Skip this many LRU walks */ u_int evict_walk_saved; /* Saved walk skips for checkpoints */ u_int evict_walk_skips; /* Number of walks skipped */ - u_int evict_disabled; /* Eviction disabled count */ + int evict_disabled; /* Eviction disabled count */ volatile uint32_t evict_busy; /* Count of threads in eviction */ int evict_start_type; /* Start position for eviction walk (see WT_EVICT_WALK_START). */ @@ -162,14 +163,13 @@ struct __wt_btree { #define WT_BTREE_LOOKASIDE 0x001000 /* Look-aside table */ #define WT_BTREE_LSM_PRIMARY 0x002000 /* Handle is current LSM primary */ #define WT_BTREE_NO_CHECKPOINT 0x004000 /* Disable checkpoints */ -#define WT_BTREE_NO_EVICTION 0x008000 /* Disable eviction */ -#define WT_BTREE_NO_LOGGING 0x010000 /* Disable logging */ -#define WT_BTREE_NO_RECONCILE 0x020000 /* Allow splits, even with no evict */ -#define WT_BTREE_REBALANCE 0x040000 /* Handle is for rebalance */ -#define WT_BTREE_SALVAGE 0x080000 /* Handle is for salvage */ -#define WT_BTREE_SKIP_CKPT 0x100000 /* Handle skipped checkpoint */ -#define WT_BTREE_UPGRADE 0x200000 /* Handle is for upgrade */ -#define WT_BTREE_VERIFY 0x400000 /* Handle is for verify */ +#define WT_BTREE_NO_LOGGING 0x008000 /* Disable logging */ +#define WT_BTREE_NO_RECONCILE 0x010000 /* Allow splits, even with no evict */ +#define WT_BTREE_REBALANCE 0x020000 /* Handle is for rebalance */ +#define WT_BTREE_SALVAGE 0x040000 /* Handle is for salvage */ +#define WT_BTREE_SKIP_CKPT 0x080000 /* Handle skipped checkpoint */ +#define WT_BTREE_UPGRADE 0x100000 /* Handle is for upgrade */ +#define WT_BTREE_VERIFY 0x200000 /* Handle is for verify */ uint32_t flags; }; diff --git a/src/include/btree.i b/src/include/btree.i index 6dda2428122..cec6f67e9bd 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1401,7 +1401,7 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) if (page->read_gen != WT_READGEN_OLDEST || LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(session, WT_SESSION_NO_EVICTION) || - F_ISSET(btree, WT_BTREE_NO_EVICTION) || + btree->evict_disabled > 0 || !__wt_page_can_evict(session, ref, NULL)) return (__wt_hazard_clear(session, ref)); @@ -1521,7 +1521,7 @@ __wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize) return (false); /* A tree that can be evicted always requires a switch. */ - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) + if (btree->evict_disabled == 0) return (true); /* Check for a tree with a single leaf page. */ @@ -1569,8 +1569,7 @@ __wt_btree_lsm_switch_primary(WT_SESSION_IMPL *session, bool on) } if (!on && F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { pindex = WT_INTL_INDEX_GET_SAFE(root); - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION) || - pindex->entries != 1) + if (btree->evict_disabled == 0 || pindex->entries != 1) return (0); first = pindex->index[0]; -- cgit v1.2.1 From 9e3f71ef55b906b25c63e1000cf39949a587550d Mon Sep 17 00:00:00 2001 From: sueloverso Date: Wed, 8 Mar 2017 14:39:13 -0500 Subject: WT-3213 Only error if fixed-length and long_running_txn is set. --- test/format/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/format/config.c b/test/format/config.c index 958ad6b7a99..cd9856d641e 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -462,7 +462,7 @@ config_lrt(void) * stores. */ if (g.type == FIX) { - if (config_is_perm("long_running_txn")) + if (config_is_perm("long_running_txn") && g.c_long_running_txn) testutil_die(EINVAL, "long_running_txn not supported with fixed-length " "column store"); -- cgit v1.2.1 From a67019791436f1dfaca9cffda17e2fa9935296db Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Fri, 10 Mar 2017 14:00:13 +1100 Subject: WT-3207 Don't hold clean handles during checkpoints. (#3319) Previously, we gathered handles, then started a transaction, then figured out which handles were clean and released them. However, * checkpoints were keeping every handle in both its handle list and in the meta_tracking list because the *_apply_all functions were saving all handles when meta_tracking was active; and * we had acquired exclusive locks on checkpoints to be dropped before determining that we could skip a checkpoint in a clean tree. These locks blocked drops (among other things) until the checkpoint completed. The solution here is to first start the transaction, then check for clean handles as checkpoint visits them. However, this has to cope with races where a handle changes state in between the transaction starting and getting the handle (e.g., table creates, bulk loads completing). --- src/conn/conn_dhandle.c | 12 +- src/include/btree.h | 1 - src/include/txn.i | 3 +- src/meta/meta_apply.c | 6 +- src/meta/meta_table.c | 3 - src/session/session_dhandle.c | 17 +- src/txn/txn_ckpt.c | 413 +++++++++++++++++++++++------------------- 7 files changed, 247 insertions(+), 208 deletions(-) diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 6958b79f10f..25795a8d309 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -400,10 +400,7 @@ __conn_btree_apply_internal(WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle, return (ret == EBUSY ? 0 : ret); WT_SAVE_DHANDLE(session, ret = file_func(session, cfg)); - if (WT_META_TRACKING(session)) - WT_TRET(__wt_meta_track_handle_lock(session, false)); - else - WT_TRET(__wt_session_release_btree(session)); + WT_TRET(__wt_session_release_btree(session)); return (ret); } @@ -497,7 +494,12 @@ __wt_conn_dhandle_close_all( session->dhandle = dhandle; - /* Lock the handle exclusively. */ + /* + * Lock the handle exclusively. If this is part of + * schema-changing operation (indicated by metadata tracking + * being enabled), hold the lock for the duration of the + * operation. + */ WT_ERR(__wt_session_get_btree(session, dhandle->name, dhandle->checkpoint, NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY)); diff --git a/src/include/btree.h b/src/include/btree.h index fc7cd352883..857dc6694c5 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -127,7 +127,6 @@ struct __wt_btree { u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */ uint64_t checkpoint_gen; /* Checkpoint generation */ - bool include_checkpoint_txn;/* ID checks include checkpoint */ uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ uint64_t write_gen; /* Write generation */ diff --git a/src/include/txn.i b/src/include/txn.i index 0cc4a6f8439..314c948e4d1 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -125,7 +125,8 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) * minimum of it with the oldest ID, which is what we want. */ oldest_id = txn_global->oldest_id; - include_checkpoint_txn = btree == NULL || btree->include_checkpoint_txn; + include_checkpoint_txn = btree == NULL || + btree->checkpoint_gen != txn_global->checkpoint_gen; WT_READ_BARRIER(); checkpoint_pinned = txn_global->checkpoint_pinned; diff --git a/src/meta/meta_apply.c b/src/meta/meta_apply.c index fb483c21dd9..dc93180a5e5 100644 --- a/src/meta/meta_apply.c +++ b/src/meta/meta_apply.c @@ -45,11 +45,7 @@ __meta_btree_apply(WT_SESSION_IMPL *session, WT_CURSOR *cursor, session, uri, NULL, NULL, 0)) != 0) return (ret == EBUSY ? 0 : ret); WT_SAVE_DHANDLE(session, ret = file_func(session, cfg)); - if (WT_META_TRACKING(session)) - WT_TRET(__wt_meta_track_handle_lock( - session, false)); - else - WT_TRET(__wt_session_release_btree(session)); + WT_TRET(__wt_session_release_btree(session)); WT_RET(ret); } WT_RET_NOTFOUND_OK(ret); diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c index 4f60728b2d2..aca69d0e6a2 100644 --- a/src/meta/meta_table.c +++ b/src/meta/meta_table.c @@ -68,9 +68,6 @@ __wt_metadata_cursor_open( if (F_ISSET(btree, WT_BTREE_NO_LOGGING)) F_CLR(btree, WT_BTREE_NO_LOGGING); - /* The metadata file always uses checkpoint IDs in visibility checks. */ - btree->include_checkpoint_txn = true; - return (0); } diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c index 469da21a448..7c96dd8b8a8 100644 --- a/src/session/session_dhandle.c +++ b/src/session/session_dhandle.c @@ -560,7 +560,7 @@ __wt_session_get_btree(WT_SESSION_IMPL *session, int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) { - WT_DATA_HANDLE *dhandle, *saved_dhandle; + WT_DATA_HANDLE *saved_dhandle; WT_DECL_RET; WT_ASSERT(session, WT_META_TRACKING(session)); @@ -568,10 +568,15 @@ __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) /* * Get the checkpoint handle exclusive, so no one else can access it - * while we are creating the new checkpoint. + * while we are creating the new checkpoint. Hold the lock until the + * checkpoint completes. */ WT_ERR(__wt_session_get_btree(session, saved_dhandle->name, checkpoint, NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY)); + if ((ret = __wt_meta_track_handle_lock(session, false)) != 0) { + WT_TRET(__wt_session_release_btree(session)); + goto err; + } /* * Get exclusive access to the handle and then flush any pages in this @@ -587,13 +592,9 @@ __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) * We lock checkpoint handles that we are overwriting, so the handle * must be closed when we release it. */ - dhandle = session->dhandle; - F_SET(dhandle, WT_DHANDLE_DISCARD); + F_SET(session->dhandle, WT_DHANDLE_DISCARD); - WT_ERR(__wt_meta_track_handle_lock(session, false)); - - /* Restore the original btree in the session. */ + /* Restore the original data handle in the session. */ err: session->dhandle = saved_dhandle; - return (ret); } diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 3261c8089f4..d6f0e45c042 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -8,9 +8,9 @@ #include "wt_internal.h" -static int __checkpoint_lock_tree( - WT_SESSION_IMPL *, bool, bool, const char *[]); -static int __checkpoint_mark_deletes(WT_SESSION_IMPL *, const char *[]); +static int __checkpoint_lock_dirty_tree( + WT_SESSION_IMPL *, bool, bool, bool, const char *[]); +static int __checkpoint_mark_skip(WT_SESSION_IMPL *, WT_CKPT *, bool); static int __checkpoint_presync(WT_SESSION_IMPL *, const char *[]); static int __checkpoint_tree_helper(WT_SESSION_IMPL *, const char *[]); @@ -89,6 +89,33 @@ err: WT_TRET(__wt_metadata_cursor_release(session, &cursor)); return (ret); } +/* + * __checkpoint_update_generation -- + * Update the checkpoint generation of the current tree. + * + * This indicates that the tree will not be visited again by the current + * checkpoint. + */ +static void +__checkpoint_update_generation(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + + btree = S2BT(session); + + /* + * Updates to the metadata are made by the checkpoint transaction, so + * the metadata tree's checkpoint generation should never be updated. + */ + if (WT_IS_METADATA(session->dhandle)) + return; + + WT_PUBLISH(btree->checkpoint_gen, + S2C(session)->txn_global.checkpoint_gen); + WT_STAT_DATA_SET(session, + btree_checkpoint_generation, btree->checkpoint_gen); +} + /* * __checkpoint_apply_all -- * Apply an operation to all files involved in a checkpoint. @@ -239,22 +266,95 @@ int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) { WT_BTREE *btree; + WT_CONFIG_ITEM cval; + WT_CURSOR *meta_cursor; WT_DECL_RET; const char *name; + bool force, metadata_race; + + btree = S2BT(session); + + /* Find out if we have to force a checkpoint. */ + force = false; + WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval)); + force = cval.val != 0; + if (!force) { + WT_RET(__wt_config_gets_def(session, cfg, "name", 0, &cval)); + force = cval.len != 0; + } /* Should not be called with anything other than a file object. */ WT_ASSERT(session, session->dhandle->checkpoint == NULL); WT_ASSERT(session, WT_PREFIX_MATCH(session->dhandle->name, "file:")); /* Skip files that are never involved in a checkpoint. */ - if (F_ISSET(S2BT(session), WT_BTREE_NO_CHECKPOINT)) + if (F_ISSET(btree, WT_BTREE_NO_CHECKPOINT)) + return (0); + + /* + * We may have raced between starting the checkpoint transaction and + * some operation completing on the handle that updated the metadata + * (e.g., closing a bulk load cursor). All such operations either have + * exclusive access to the handle or hold the schema lock. We are now + * holding the schema lock and have an open btree handle, so if we + * can't update the metadata, then there has been some state change + * invisible to the checkpoint transaction. Skip checkpointing such + * files: they must have a recent durable point. + */ + if (!WT_IS_METADATA(session->dhandle)) { + WT_ASSERT(session, !F_ISSET(&session->txn, WT_TXN_ERROR)); + WT_RET(__wt_metadata_cursor(session, &meta_cursor)); + meta_cursor->set_key(meta_cursor, session->dhandle->name); + ret = __wt_curfile_update_check(meta_cursor); + if (ret == WT_ROLLBACK) { + metadata_race = true; + ret = 0; + } else + metadata_race = false; + WT_TRET(__wt_metadata_cursor_release(session, &meta_cursor)); + WT_RET(ret); + if (metadata_race) { + /* + * The conflict registers as a rollback error: that can + * safely be skipped here. + */ + F_CLR(&session->txn, WT_TXN_ERROR); + if (force) + WT_RET_MSG(session, EBUSY, + "forced or named checkpoint raced with " + "a metadata update"); + __wt_verbose(session, WT_VERB_CHECKPOINT, + "skipped checkpoint of %s with metadata conflict", + session->dhandle->name); + F_SET(btree, WT_BTREE_SKIP_CKPT); + __checkpoint_update_generation(session); + return (0); + } + } + + /* + * Decide whether the tree needs to be included in the checkpoint and + * if so, acquire the necessary locks. + */ + WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree( + session, true, force, true, cfg)); + WT_RET(ret); + if (F_ISSET(btree, WT_BTREE_SKIP_CKPT)) { + __checkpoint_update_generation(session); return (0); + } - /* Make sure there is space for the next entry. */ + /* + * Make sure there is space for the new entry: do this before getting + * the handle to avoid cleanup if we can't allocate the memory. + */ WT_RET(__wt_realloc_def(session, &session->ckpt_handle_allocated, session->ckpt_handle_next + 1, &session->ckpt_handle)); - /* Not strictly necessary, but cleaner to clear the current handle. */ + /* + * The current tree will be included: get it again because the handle + * we have is only valid for the duration of this function. + */ name = session->dhandle->name; session->dhandle = NULL; @@ -266,48 +366,12 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) * with eviction and we don't want to unfairly penalize (or promote) * eviction in trees due to checkpoints. */ - btree = S2BT(session); btree->evict_walk_saved = btree->evict_walk_period; - WT_SAVE_DHANDLE(session, - ret = __checkpoint_lock_tree(session, true, true, cfg)); - if (ret != 0) { - WT_TRET(__wt_session_release_btree(session)); - return (ret); - } - - /* - * Flag that the handle is part of a checkpoint for the purposes - * of transaction visibility checks. - */ - WT_PUBLISH(btree->include_checkpoint_txn, true); - session->ckpt_handle[session->ckpt_handle_next++] = session->dhandle; return (0); } -/* - * __checkpoint_update_generation -- - * Update the checkpoint generation of the current tree. - * - * This indicates that the tree will not be visited again by the current - * checkpoint. - */ -static void -__checkpoint_update_generation(WT_SESSION_IMPL *session) -{ - WT_BTREE *btree; - - btree = S2BT(session); - if (!WT_IS_METADATA(session->dhandle)) - WT_PUBLISH(btree->include_checkpoint_txn, false); - - WT_PUBLISH(btree->checkpoint_gen, - S2C(session)->txn_global.checkpoint_gen); - WT_STAT_DATA_SET(session, - btree_checkpoint_generation, btree->checkpoint_gen); -} - /* * __checkpoint_reduce_dirty_cache -- * Release clean trees from the list cached for checkpoints. @@ -433,36 +497,6 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session) WT_STAT_CONN_SET(session, txn_checkpoint_scrub_time, total_ms); } -/* - * __checkpoint_release_clean_trees -- - * Release clean trees from the list cached for checkpoints. - */ -static int -__checkpoint_release_clean_trees(WT_SESSION_IMPL *session) -{ - WT_BTREE *btree; - WT_DATA_HANDLE *dhandle; - WT_DECL_RET; - u_int i; - - for (i = 0; i < session->ckpt_handle_next; i++) { - dhandle = session->ckpt_handle[i]; - btree = dhandle->handle; - if (!F_ISSET(btree, WT_BTREE_SKIP_CKPT)) - continue; - __wt_meta_ckptlist_free(session, btree->ckpt); - btree->ckpt = NULL; - WT_WITH_DHANDLE(session, dhandle, - __checkpoint_update_generation(session)); - session->ckpt_handle[i] = NULL; - WT_WITH_DHANDLE(session, dhandle, - ret = __wt_session_release_btree(session)); - WT_RET(ret); - } - - return (0); -} - /* * __checkpoint_stats -- * Update checkpoint timer stats. @@ -535,6 +569,96 @@ __checkpoint_fail_reset(WT_SESSION_IMPL *session) S2BT(session)->ckpt = NULL; } +/* + * __checkpoint_prepare -- + * Start the transaction for a checkpoint and gather handles. + */ +static int +__checkpoint_prepare(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_TXN *txn; + WT_TXN_GLOBAL *txn_global; + WT_TXN_STATE *txn_state; + const char *txn_cfg[] = { WT_CONFIG_BASE(session, + WT_SESSION_begin_transaction), "isolation=snapshot", NULL }; + + conn = S2C(session); + txn = &session->txn; + txn_global = &conn->txn_global; + txn_state = WT_SESSION_TXN_STATE(session); + + /* + * Start a snapshot transaction for the checkpoint. + * + * Note: we don't go through the public API calls because they have + * side effects on cursors, which applications can hold open across + * calls to checkpoint. + */ + WT_RET(__wt_txn_begin(session, txn_cfg)); + + /* Ensure a transaction ID is allocated prior to sharing it globally */ + WT_RET(__wt_txn_id_check(session)); + + /* + * Mark the connection as clean. If some data gets modified after + * generating checkpoint transaction id, connection will be reset to + * dirty when reconciliation marks the btree dirty on encountering the + * dirty page. + */ + conn->modified = false; + + /* + * Save the checkpoint session ID. + * + * We never do checkpoints in the default session (with id zero). + */ + WT_ASSERT(session, session->id != 0 && txn_global->checkpoint_id == 0); + txn_global->checkpoint_id = session->id; + + /* + * Remove the checkpoint transaction from the global table. + * + * This allows ordinary visibility checks to move forward because + * checkpoints often take a long time and only write to the metadata. + */ + __wt_writelock(session, &txn_global->scan_rwlock); + txn_global->checkpoint_txnid = txn->id; + txn_global->checkpoint_pinned = WT_MIN(txn->id, txn->snap_min); + + /* + * Sanity check that the oldest ID hasn't moved on before we have + * cleared our entry. + */ + WT_ASSERT(session, + WT_TXNID_LE(txn_global->oldest_id, txn_state->id) && + WT_TXNID_LE(txn_global->oldest_id, txn_state->pinned_id)); + + /* + * Clear our entry from the global transaction session table. Any + * operation that needs to know about the ID for this checkpoint will + * consider the checkpoint ID in the global structure. Most operations + * can safely ignore the checkpoint ID (see the visible all check for + * details). + */ + txn_state->id = txn_state->pinned_id = + txn_state->metadata_pinned = WT_TXN_NONE; + __wt_writeunlock(session, &txn_global->scan_rwlock); + + /* + * Get a list of handles we want to flush; for named checkpoints this + * may pull closed objects into the session cache. + * + * First, gather all handles, then start the checkpoint transaction, + * then release any clean handles. + */ + WT_ASSERT(session, session->ckpt_handle_next == 0); + WT_WITH_TABLE_READ_LOCK(session, ret = __checkpoint_apply_all( + session, cfg, __wt_checkpoint_get_handles, NULL)); + return (ret); +} + /* * __txn_checkpoint -- * Checkpoint a database or a list of objects in the database. @@ -550,19 +674,15 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_TXN *txn; WT_TXN_GLOBAL *txn_global; WT_TXN_ISOLATION saved_isolation; - WT_TXN_STATE *txn_state; void *saved_meta_next; u_int i; uint64_t fsync_duration_usecs; bool failed, full, idle, logging, tracking; - const char *txn_cfg[] = { WT_CONFIG_BASE(session, - WT_SESSION_begin_transaction), "isolation=snapshot", NULL }; conn = S2C(session); cache = conn->cache; txn = &session->txn; txn_global = &conn->txn_global; - txn_state = WT_SESSION_TXN_STATE(session); saved_isolation = session->isolation; full = idle = logging = tracking = false; @@ -631,86 +751,24 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) tracking = true; /* - * Get a list of handles we want to flush; for named checkpoints this - * may pull closed objects into the session cache. - * * We want to skip checkpointing clean handles whenever possible. That * is, when the checkpoint is not named or forced. However, we need to * take care about ordering with respect to the checkpoint transaction. * - * If we skip clean handles before starting the transaction, the + * We can't skip clean handles before starting the transaction or the * checkpoint can miss updates in trees that become dirty as the * checkpoint is starting. If we wait until the transaction has * started before locking a handle, there could be a metadata-changing * operation in between (e.g., salvage) that will cause a write * conflict when the checkpoint goes to write the metadata. * - * First, gather all handles, then start the checkpoint transaction, - * then release any clean handles. + * Hold the schema lock while starting the transaction and gathering + * handles so the set we get is complete and correct. */ - WT_ASSERT(session, session->ckpt_handle_next == 0); - WT_WITH_SCHEMA_LOCK(session, - WT_WITH_TABLE_READ_LOCK(session, - ret = __checkpoint_apply_all( - session, cfg, __wt_checkpoint_get_handles, NULL))); + WT_WITH_SCHEMA_LOCK(session, ret = __checkpoint_prepare(session, cfg)); WT_ERR(ret); - /* - * Start a snapshot transaction for the checkpoint. - * - * Note: we don't go through the public API calls because they have - * side effects on cursors, which applications can hold open across - * calls to checkpoint. - */ - WT_ERR(__wt_txn_begin(session, txn_cfg)); - - /* Ensure a transaction ID is allocated prior to sharing it globally */ - WT_ERR(__wt_txn_id_check(session)); - - /* - * Mark the connection as clean. If some data gets modified after - * generating checkpoint transaction id, connection will be reset to - * dirty when reconciliation marks the btree dirty on encountering the - * dirty page. - */ - conn->modified = false; - - /* - * Save the checkpoint session ID. - * - * We never do checkpoints in the default session (with id zero). - */ - WT_ASSERT(session, session->id != 0 && txn_global->checkpoint_id == 0); - txn_global->checkpoint_id = session->id; - - /* - * Remove the checkpoint transaction from the global table. - * - * This allows ordinary visibility checks to move forward because - * checkpoints often take a long time and only write to the metadata. - */ - __wt_writelock(session, &txn_global->scan_rwlock); - txn_global->checkpoint_txnid = txn->id; - txn_global->checkpoint_pinned = WT_MIN(txn->id, txn->snap_min); - - /* - * Sanity check that the oldest ID hasn't moved on before we have - * cleared our entry. - */ - WT_ASSERT(session, - WT_TXNID_LE(txn_global->oldest_id, txn_state->id) && - WT_TXNID_LE(txn_global->oldest_id, txn_state->pinned_id)); - - /* - * Clear our entry from the global transaction session table. Any - * operation that needs to know about the ID for this checkpoint will - * consider the checkpoint ID in the global structure. Most operations - * can safely ignore the checkpoint ID (see the visible all check for - * details). - */ - txn_state->id = txn_state->pinned_id = - txn_state->metadata_pinned = WT_TXN_NONE; - __wt_writeunlock(session, &txn_global->scan_rwlock); + WT_ASSERT(session, txn->isolation == WT_ISO_SNAPSHOT); /* * Unblock updates -- we can figure out that any updates to clean pages @@ -719,16 +777,6 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) cache->eviction_scrub_limit = 0.0; WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target, 0); - /* - * Mark old checkpoints that are being deleted and figure out which - * trees we can skip in this checkpoint. - * - * Release clean trees. Any updates made after this point will not - * visible to the checkpoint transaction. - */ - WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_mark_deletes)); - WT_ERR(__checkpoint_release_clean_trees(session)); - /* Tell logging that we have started a database checkpoint. */ if (full && logging) WT_ERR(__wt_txn_checkpoint_log( @@ -1065,12 +1113,13 @@ __drop_to(WT_CKPT *ckptbase, const char *name, size_t len) } /* - * __checkpoint_lock_tree -- - * Acquire the locks required to checkpoint a tree. + * __checkpoint_lock_dirty_tree -- + * Decide whether the tree needs to be included in the checkpoint and if + * so, acquire the necessary locks. */ static int -__checkpoint_lock_tree(WT_SESSION_IMPL *session, - bool is_checkpoint, bool need_tracking, const char *cfg[]) +__checkpoint_lock_dirty_tree(WT_SESSION_IMPL *session, + bool is_checkpoint, bool force, bool need_tracking, const char *cfg[]) { WT_BTREE *btree; WT_CKPT *ckpt, *ckptbase; @@ -1194,6 +1243,14 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session, ckpt->name); } + /* + * Mark old checkpoints that are being deleted and figure out which + * trees we can skip in this checkpoint. + */ + WT_ERR(__checkpoint_mark_skip(session, ckptbase, force)); + if (F_ISSET(btree, WT_BTREE_SKIP_CKPT)) + goto err; + /* * Lock the checkpoints that will be deleted. * @@ -1268,23 +1325,19 @@ err: if (hot_backup_locked) } /* - * __checkpoint_mark_deletes -- - * Figure out what old checkpoints will be deleted, and whether the - * checkpoint can be skipped entirely. + * __checkpoint_mark_skip -- + * Figure out whether the checkpoint can be skipped for a tree. */ static int -__checkpoint_mark_deletes( - WT_SESSION_IMPL *session, const char *cfg[]) +__checkpoint_mark_skip( + WT_SESSION_IMPL *session, WT_CKPT *ckptbase, bool force) { WT_BTREE *btree; - WT_CKPT *ckpt, *ckptbase; - WT_CONFIG_ITEM cval; + WT_CKPT *ckpt; const char *name; int deleted; - bool force; btree = S2BT(session); - ckptbase = btree->ckpt; /* * Check for clean objects not requiring a checkpoint. @@ -1310,12 +1363,7 @@ __checkpoint_mark_deletes( * to open the checkpoint in a cursor after taking any checkpoint, which * means it must exist. */ - force = false; F_CLR(btree, WT_BTREE_SKIP_CKPT); - if (!btree->modified && cfg != NULL) { - WT_RET(__wt_config_gets(session, cfg, "force", &cval)); - force = cval.val != 0; - } if (!btree->modified && !force) { deleted = 0; WT_CKPT_FOREACH(ckptbase, ckpt) @@ -1524,7 +1572,8 @@ __checkpoint_presync(WT_SESSION_IMPL *session, const char *cfg[]) WT_UNUSED(cfg); btree = S2BT(session); - WT_ASSERT(session, !btree->include_checkpoint_txn); + WT_ASSERT(session, btree->checkpoint_gen == + S2C(session)->txn_global.checkpoint_gen); btree->evict_walk_period = btree->evict_walk_saved; return (0); } @@ -1582,12 +1631,11 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_ASSERT(session, !WT_IS_METADATA(session->dhandle) || F_ISSET(session, WT_SESSION_LOCKED_METADATA)); - WT_SAVE_DHANDLE(session, - ret = __checkpoint_lock_tree(session, true, true, cfg)); - WT_RET(ret); - WT_SAVE_DHANDLE(session, - ret = __checkpoint_mark_deletes(session, cfg)); + WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree( + session, true, false, true, cfg)); WT_RET(ret); + if (F_ISSET(S2BT(session), WT_BTREE_SKIP_CKPT)) + return (0); return (__checkpoint_tree(session, true, cfg)); } @@ -1662,15 +1710,10 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) if (need_tracking) WT_RET(__wt_meta_track_on(session)); - WT_SAVE_DHANDLE(session, - ret = __checkpoint_lock_tree(session, false, need_tracking, NULL)); + WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree( + session, false, false, need_tracking, NULL)); WT_ASSERT(session, ret == 0); - if (ret == 0) { - WT_SAVE_DHANDLE(session, - ret = __checkpoint_mark_deletes(session, NULL)); - WT_ASSERT(session, ret == 0); - } - if (ret == 0) + if (ret == 0 && !F_ISSET(btree, WT_BTREE_SKIP_CKPT)) ret = __checkpoint_tree(session, false, NULL); if (need_tracking) -- cgit v1.2.1 From 1e05438f426c0c54a603f660fb7831eb2b9a523e Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Fri, 10 Mar 2017 21:01:21 +1100 Subject: WT-3207 Report a message for conflicting forced checkpoints, rather than an error (#3326) Have test/fops handle EBUSY returns from forced checkpoints and EINVAL from bulk cursors. --- src/docs/upgrading.dox | 6 ++++++ src/txn/txn_ckpt.c | 8 +++++--- test/fops/file.c | 27 ++++++++++++++++++--------- test/fops/t.c | 5 +++++ 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index f463e6bc615..2894db0c126 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -13,6 +13,12 @@ In the 2.9.1 release we added statistics tracking handle list lock timing, we have switched that lock from a spin lock to a read-write lock, and consequently changed the statistics tracking lock related wait time. +
Forced and named checkpoint error conditions changed
+
+There are new cases where checkpoints created with an explicit name or the +"force" configuration option can return an EBUSY error. This can happen if +the checkpoint overlaps with other schema operations, for example table create. +
@section version_291 Upgrading to Version 2.9.1 diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index d6f0e45c042..3eb07089b87 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -319,10 +319,12 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) * safely be skipped here. */ F_CLR(&session->txn, WT_TXN_ERROR); - if (force) - WT_RET_MSG(session, EBUSY, + if (force) { + WT_RET(__wt_msg(session, "forced or named checkpoint raced with " - "a metadata update"); + "a metadata update")); + return (EBUSY); + } __wt_verbose(session, WT_VERB_CHECKPOINT, "skipped checkpoint of %s with metadata conflict", session->dhandle->name); diff --git a/test/fops/file.c b/test/fops/file.c index ea15f1ee80d..66c23dfed3c 100644 --- a/test/fops/file.c +++ b/test/fops/file.c @@ -51,7 +51,7 @@ obj_bulk(void) if ((ret = c->close(c)) != 0) testutil_die(ret, "cursor.close"); } else if (ret != ENOENT && ret != EBUSY && ret != EINVAL) - testutil_die(ret, "session.open_cursor"); + testutil_die(ret, "session.open_cursor bulk"); } if ((ret = session->close(session, NULL)) != 0) testutil_die(ret, "session.close"); @@ -79,12 +79,17 @@ obj_bulk_unique(int force) testutil_die(ret, "session.create: %s", new_uri); __wt_yield(); - if ((ret = - session->open_cursor(session, new_uri, NULL, "bulk", &c)) != 0) - testutil_die(ret, "session.open_cursor: %s", new_uri); - - if ((ret = c->close(c)) != 0) - testutil_die(ret, "cursor.close"); + /* + * Opening a bulk cursor may have raced with a forced checkpoint + * which created a checkpoint of the empty file, and triggers an EINVAL + */ + if ((ret = session->open_cursor( + session, new_uri, NULL, "bulk", &c)) == 0) { + if ((ret = c->close(c)) != 0) + testutil_die(ret, "cursor.close"); + } else if (ret != EINVAL) + testutil_die(ret, + "session.open_cursor bulk unique: %s, new_uri"); while ((ret = session->drop( session, new_uri, force ? "force" : NULL)) != 0) @@ -190,9 +195,13 @@ obj_checkpoint(void) if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "conn.session"); - /* Force the checkpoint so it has to be taken. */ + /* + * Force the checkpoint so it has to be taken. Forced checkpoints can + * race with other metadata operations and return EBUSY - we'd expect + * applications using forced checkpoints to retry on EBUSY. + */ if ((ret = session->checkpoint(session, "force")) != 0) - if (ret != ENOENT) + if (ret != EBUSY && ret != ENOENT) testutil_die(ret, "session.checkpoint"); if ((ret = session->close(session, NULL)) != 0) diff --git a/test/fops/t.c b/test/fops/t.c index 651d22c8deb..469d5acd33a 100644 --- a/test/fops/t.c +++ b/test/fops/t.c @@ -217,6 +217,11 @@ handle_message(WT_EVENT_HANDLER *handler, (void)(handler); (void)(session); + /* Ignore messages about failing to create forced checkpoints. */ + if (strstr( + message, "forced or named checkpoint") != NULL) + return (0); + if (logfp != NULL) return (fprintf(logfp, "%s\n", message) < 0 ? -1 : 0); -- cgit v1.2.1 From f72c78b74d42c9e89bc98ad56ba184536e8efcae Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Mon, 13 Mar 2017 17:13:31 +1100 Subject: WT-3207 Fix a leak if a checkpoint fails. (#3329) Also switch to holding the schema lock when completing a bulk load. This avoids a race with checkpoints starting, so avoids a failure mode that was added to checkpoint earlier in this ticket. Assert that we don't hit that case instead. --- src/btree/bt_vrfy.c | 2 +- src/conn/conn_dhandle.c | 10 +++--- src/include/extern.h | 2 +- src/meta/meta_ckpt.c | 10 +++--- src/meta/meta_ext.c | 2 +- src/session/session_dhandle.c | 10 ++++++ src/session/session_salvage.c | 2 +- src/txn/txn_ckpt.c | 75 +++++++++++++++---------------------------- 8 files changed, 50 insertions(+), 63 deletions(-) diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c index 3c90e580696..7475811adc5 100644 --- a/src/btree/bt_vrfy.c +++ b/src/btree/bt_vrfy.c @@ -274,7 +274,7 @@ err: /* Inform the underlying block manager we're done. */ /* Discard the list of checkpoints. */ if (ckptbase != NULL) - __wt_meta_ckptlist_free(session, ckptbase); + __wt_meta_ckptlist_free(session, &ckptbase); /* Free allocated memory. */ __wt_scr_free(session, &vs->max_key); diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 25795a8d309..6c8d66d63f8 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -152,11 +152,11 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) WT_RET(__wt_evict_file_exclusive_on(session)); /* - * If we don't already have the schema lock, make it an error to try - * to acquire it. The problem is that we are holding an exclusive - * lock on the handle, and if we attempt to acquire the schema lock - * we might deadlock with a thread that has the schema lock and wants - * a handle lock (specifically, checkpoint). + * If we don't already have the schema lock, make it an error to try to + * acquire it. The problem is that we are holding an exclusive lock on + * the handle, and if we attempt to acquire the schema lock we might + * deadlock with a thread that has the schema lock and wants a handle + * lock. */ no_schema_lock = false; if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) { diff --git a/src/include/extern.h b/src/include/extern.h index d0c9655fafb..db718966426 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -462,7 +462,7 @@ extern int __wt_meta_checkpoint_last_name( WT_SESSION_IMPL *session, const char extern int __wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *fname) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_ckptlist_get( WT_SESSION_IMPL *session, const char *fname, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase, WT_LSN *ckptlsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT *ckptbase) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_meta_checkpoint_free(WT_SESSION_IMPL *session, WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_ext_metadata_insert(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_ext_metadata_remove( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/meta/meta_ckpt.c b/src/meta/meta_ckpt.c index b985104c2eb..151bbe0e081 100644 --- a/src/meta/meta_ckpt.c +++ b/src/meta/meta_ckpt.c @@ -297,7 +297,7 @@ __wt_meta_ckptlist_get( *ckptbasep = ckptbase; if (0) { -err: __wt_meta_ckptlist_free(session, ckptbase); +err: __wt_meta_ckptlist_free(session, &ckptbase); } __wt_free(session, config); __wt_scr_free(session, &buf); @@ -463,16 +463,16 @@ err: __wt_scr_free(session, &buf); * Discard the checkpoint array. */ void -__wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT *ckptbase) +__wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep) { - WT_CKPT *ckpt; + WT_CKPT *ckpt, *ckptbase; - if (ckptbase == NULL) + if ((ckptbase = *ckptbasep) == NULL) return; WT_CKPT_FOREACH(ckptbase, ckpt) __wt_meta_checkpoint_free(session, ckpt); - __wt_free(session, ckptbase); + __wt_free(session, *ckptbasep); } /* diff --git a/src/meta/meta_ext.c b/src/meta/meta_ext.c index 50e7568fe77..aa1ea8b974d 100644 --- a/src/meta/meta_ext.c +++ b/src/meta/meta_ext.c @@ -102,5 +102,5 @@ void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase) WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) { - __wt_meta_ckptlist_free((WT_SESSION_IMPL *)session, ckptbase); + __wt_meta_ckptlist_free((WT_SESSION_IMPL *)session, &ckptbase); } diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c index 7c96dd8b8a8..95fb6a6f90e 100644 --- a/src/session/session_dhandle.c +++ b/src/session/session_dhandle.c @@ -270,6 +270,16 @@ __wt_session_release_btree(WT_SESSION_IMPL *session) if (F_ISSET(dhandle, WT_DHANDLE_DISCARD_FORCE)) { ret = __wt_conn_btree_sync_and_close(session, false, true); F_CLR(dhandle, WT_DHANDLE_DISCARD_FORCE); + } else if (F_ISSET(btree, WT_BTREE_BULK)) { + WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) && + !F_ISSET(dhandle, WT_DHANDLE_DISCARD)); + /* + * Acquire the schema lock while completing a bulk load. This + * avoids racing with a checkpoint while it gathers a set + * of handles. + */ + WT_WITH_SCHEMA_LOCK(session, ret = + __wt_conn_btree_sync_and_close(session, false, false)); } else if (F_ISSET(dhandle, WT_DHANDLE_DISCARD) || F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)) { WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE)); diff --git a/src/session/session_salvage.c b/src/session/session_salvage.c index 983b28dd8ea..12ce71cdbb0 100644 --- a/src/session/session_salvage.c +++ b/src/session/session_salvage.c @@ -54,6 +54,6 @@ __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__wt_meta_ckptlist_set( session, dhandle->name, ckptbase, NULL)); -err: __wt_meta_ckptlist_free(session, ckptbase); +err: __wt_meta_ckptlist_free(session, &ckptbase); return (ret); } diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 3eb07089b87..748f4aa2473 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -267,10 +267,9 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) { WT_BTREE *btree; WT_CONFIG_ITEM cval; - WT_CURSOR *meta_cursor; WT_DECL_RET; const char *name; - bool force, metadata_race; + bool force; btree = S2BT(session); @@ -291,6 +290,7 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) if (F_ISSET(btree, WT_BTREE_NO_CHECKPOINT)) return (0); +#ifdef HAVE_DIAGNOSTIC /* * We may have raced between starting the checkpoint transaction and * some operation completing on the handle that updated the metadata @@ -298,10 +298,12 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) * exclusive access to the handle or hold the schema lock. We are now * holding the schema lock and have an open btree handle, so if we * can't update the metadata, then there has been some state change - * invisible to the checkpoint transaction. Skip checkpointing such - * files: they must have a recent durable point. + * invisible to the checkpoint transaction. */ if (!WT_IS_METADATA(session->dhandle)) { + WT_CURSOR *meta_cursor; + bool metadata_race; + WT_ASSERT(session, !F_ISSET(&session->txn, WT_TXN_ERROR)); WT_RET(__wt_metadata_cursor(session, &meta_cursor)); meta_cursor->set_key(meta_cursor, session->dhandle->name); @@ -313,26 +315,9 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) metadata_race = false; WT_TRET(__wt_metadata_cursor_release(session, &meta_cursor)); WT_RET(ret); - if (metadata_race) { - /* - * The conflict registers as a rollback error: that can - * safely be skipped here. - */ - F_CLR(&session->txn, WT_TXN_ERROR); - if (force) { - WT_RET(__wt_msg(session, - "forced or named checkpoint raced with " - "a metadata update")); - return (EBUSY); - } - __wt_verbose(session, WT_VERB_CHECKPOINT, - "skipped checkpoint of %s with metadata conflict", - session->dhandle->name); - F_SET(btree, WT_BTREE_SKIP_CKPT); - __checkpoint_update_generation(session); - return (0); - } + WT_ASSERT(session, !metadata_race); } +#endif /* * Decide whether the tree needs to be included in the checkpoint and @@ -342,6 +327,7 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) session, true, force, true, cfg)); WT_RET(ret); if (F_ISSET(btree, WT_BTREE_SKIP_CKPT)) { + WT_ASSERT(session, btree->ckpt == NULL); __checkpoint_update_generation(session); return (0); } @@ -567,8 +553,11 @@ __checkpoint_verbose_track(WT_SESSION_IMPL *session, static void __checkpoint_fail_reset(WT_SESSION_IMPL *session) { - S2BT(session)->modified = true; - S2BT(session)->ckpt = NULL; + WT_BTREE *btree; + + btree = S2BT(session); + btree->modified = true; + __wt_meta_ckptlist_free(session, &btree->ckpt); } /* @@ -600,6 +589,8 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, const char *cfg[]) */ WT_RET(__wt_txn_begin(session, txn_cfg)); + WT_DIAGNOSTIC_YIELD; + /* Ensure a transaction ID is allocated prior to sharing it globally */ WT_RET(__wt_txn_id_check(session)); @@ -1286,33 +1277,20 @@ __checkpoint_lock_dirty_tree(WT_SESSION_IMPL *session, } /* - * There are special files: those being bulk-loaded, salvaged, upgraded - * or verified during the checkpoint. We have to do something for those - * objects because a checkpoint is an external name the application can - * reference and the name must exist no matter what's happening during - * the checkpoint. For bulk-loaded files, we could block until the load - * completes, checkpoint the partial load, or magic up an empty-file - * checkpoint. The first is too slow, the second is insane, so do the - * third. - * Salvage, upgrade and verify don't currently require any work, all - * three hold the schema lock, blocking checkpoints. If we ever want to - * fix that (and I bet we eventually will, at least for verify), we can - * copy the last checkpoint the file has. That works if we guarantee - * salvage, upgrade and verify act on objects with previous checkpoints - * (true if handles are closed/re-opened between object creation and a - * subsequent salvage, upgrade or verify operation). Presumably, - * salvage and upgrade will discard all previous checkpoints when they - * complete, which is fine with us. This change will require reference - * counting checkpoints, and once that's done, we should use checkpoint - * copy instead of forcing checkpoints on clean objects to associate - * names with checkpoints. + * There are special tree: those being bulk-loaded, salvaged, upgraded + * or verified during the checkpoint. They should never be part of a + * checkpoint: we will fail to lock them because the operations have + * exclusive access to the handles. Named checkpoints will fail in that + * case, ordinary checkpoints will skip files that cannot be opened + * normally. */ WT_ASSERT(session, !is_checkpoint || !F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)); __wt_readunlock(session, &conn->hot_backup_lock); - WT_ASSERT(session, btree->ckpt == NULL); + WT_ASSERT(session, btree->ckpt == NULL && + !F_ISSET(btree, WT_BTREE_SKIP_CKPT)); btree->ckpt = ckptbase; return (0); @@ -1320,7 +1298,7 @@ __checkpoint_lock_dirty_tree(WT_SESSION_IMPL *session, err: if (hot_backup_locked) __wt_readunlock(session, &conn->hot_backup_lock); - __wt_meta_ckptlist_free(session, ckptbase); + __wt_meta_ckptlist_free(session, &ckptbase); __wt_free(session, name_alloc); return (ret); @@ -1554,8 +1532,7 @@ err: /* S2C(session)->modified = true; } - __wt_meta_ckptlist_free(session, ckptbase); - btree->ckpt = NULL; + __wt_meta_ckptlist_free(session, &btree->ckpt); return (ret); } -- cgit v1.2.1 From 28a883f7b4acd020a8d92a733cb9df415a6be482 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 13 Mar 2017 20:53:11 -0400 Subject: WT-3211 WT_CURSOR.remove cannot always retain its position. (#3321) --- .gitignore | 30 +++-- SConstruct | 1 - dist/s_string.ok | 1 + dist/s_void | 4 +- examples/c/Makefile.am | 1 - examples/c/ex_scope.c | 217 ------------------------------ src/btree/bt_cursor.c | 312 +++++++++++++++++++++++++++++++------------- src/btree/bt_ret.c | 152 ++++++++++++--------- src/cursor/cur_file.c | 23 ++-- src/cursor/cur_table.c | 30 ++++- src/docs/cursor-ops.dox | 4 - src/docs/upgrading.dox | 30 ++++- src/include/api.h | 37 +++--- src/include/buf.i | 37 +++--- src/include/cursor.i | 35 ++--- src/include/error.h | 6 +- src/include/extern.h | 1 + src/include/misc.h | 1 + src/include/session.h | 1 - src/include/wiredtiger.in | 30 +++-- src/lsm/lsm_cursor.c | 42 +++++- src/txn/txn_ckpt.c | 1 - test/csuite/Makefile.am | 7 +- test/csuite/scope/main.c | 288 ++++++++++++++++++++++++++++++++++++++++ test/suite/test_cursor10.py | 4 +- test/suite/test_cursor11.py | 159 ++++++++++++++++++++++ 26 files changed, 960 insertions(+), 494 deletions(-) delete mode 100644 examples/c/ex_scope.c create mode 100644 test/csuite/scope/main.c create mode 100644 test/suite/test_cursor11.py diff --git a/.gitignore b/.gitignore index c7b3ade9e87..4611f2aa98c 100644 --- a/.gitignore +++ b/.gitignore @@ -90,24 +90,28 @@ _wiredtiger.pyd **/examples/c/ex_pack **/examples/c/ex_process **/examples/c/ex_schema -**/examples/c/ex_scope **/examples/c/ex_stat **/examples/c/ex_sync **/examples/c/ex_thread **/test/bloom/t **/test/checkpoint/t -**/test/csuite/test_wt1965_col_efficiency -**/test/csuite/test_wt2246_col_append -**/test/csuite/test_wt2323_join_visibility -**/test/csuite/test_wt2403_lsm_workload -**/test/csuite/test_wt2447_join_main_table -**/test/csuite/test_wt2535_insert_race -**/test/csuite/test_wt2592_join_schema -**/test/csuite/test_wt2695_checksum -**/test/csuite/test_wt2719_reconfig -**/test/csuite/test_wt2834_join_bloom_fix -**/test/csuite/test_wt2853_perf -**/test/csuite/test_wt2999_join_extractor +**/test_scope +**/test_wt1965_col_efficiency +**/test_wt2246_col_append +**/test_wt2323_join_visibility +**/test_wt2403_lsm_workload +**/test_wt2447_join_main_table +**/test_wt2535_insert_race +**/test_wt2592_join_schema +**/test_wt2695_checksum +**/test_wt2719_reconfig +**/test_wt2834_join_bloom_fix +**/test_wt2853_perf +**/test_wt2909_checkpoint_integrity +**/test_wt2999_join_extractor +**/test_wt3120_filesys +**/test_wt3135_search_near_collator +**/test_wt3184_dup_index_collator **/test/cursor_order/cursor_order **/test/fops/t **/test/format/s_dumpcmp diff --git a/SConstruct b/SConstruct index e9e72630b11..b397f662be7 100644 --- a/SConstruct +++ b/SConstruct @@ -376,7 +376,6 @@ examples = [ "ex_pack", "ex_process", "ex_schema", - "ex_scope", "ex_stat", "ex_thread", ] diff --git a/dist/s_string.ok b/dist/s_string.ok index e033f77327f..cdfa4aec968 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -63,6 +63,7 @@ CPUs CRC CSV CStream +CURFILE CURSORs CURSTD CallsCustDate diff --git a/dist/s_void b/dist/s_void index 90425d5a718..249f043d029 100755 --- a/dist/s_void +++ b/dist/s_void @@ -137,7 +137,7 @@ for f in `find bench ext src test -name '*.[ci]'`; do # form of return assignment or call. file_parse $f | sed -e 's/return ([^)]*); }$//' \ - -e '/[A-Z]*_API_CALL[A-Z_]*(/d' \ + -e '/[_A-Z]*_API_CALL[_A-Z]*(/d' \ -e '/WT_CURSOR_NEEDKEY(/d' \ -e '/WT_CURSOR_NEEDVALUE(/d' \ -e '/WT_ERR[A-Z_]*(/d' \ @@ -166,7 +166,7 @@ for f in `find bench ext src test -name '*.[ci]'`; do file_parse $f | grep 'WT_DECL_RET' | sed -e '/ret =/d' \ - -e '/API_END_RET/d' \ + -e '/[_A-Z]*_API_CALL[_A-Z]*(/d' \ -e '/WT_CURSOR_NEEDKEY/d' \ -e '/WT_CURSOR_NEEDVALUE/d' \ -e '/WT_ERR/d' \ diff --git a/examples/c/Makefile.am b/examples/c/Makefile.am index d5305eec5c8..20936661e06 100644 --- a/examples/c/Makefile.am +++ b/examples/c/Makefile.am @@ -20,7 +20,6 @@ noinst_PROGRAMS = \ ex_pack \ ex_process \ ex_schema \ - ex_scope \ ex_stat \ ex_sync \ ex_thread diff --git a/examples/c/ex_scope.c b/examples/c/ex_scope.c deleted file mode 100644 index 795ad85d57b..00000000000 --- a/examples/c/ex_scope.c +++ /dev/null @@ -1,217 +0,0 @@ -/*- - * Public Domain 2014-2016 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. - * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * ex_scope.c - * demonstrates the scope of buffers holding cursor keys and values. - */ -#include -#include -#include -#include - -#include - -#ifdef _WIN32 -/* snprintf is not supported on <= VS2013 */ -#define snprintf _snprintf -#endif - -static const char *home; - -static int -cursor_scope_ops(WT_CURSOR *cursor) -{ - struct { - const char *op; - const char *key; - const char *value; - int (*apply)(WT_CURSOR *); - } *op, ops[] = { - { "insert", "key1", "value1", cursor->insert, }, - { "update", "key1", "value2", cursor->update, }, - { "search", "key1", "value2", cursor->search, }, - { "remove", "key1", "value2", cursor->remove, }, - { NULL, NULL, NULL, NULL } - }; - WT_SESSION *session; - const char *key, *value; - char keybuf[10], valuebuf[10]; - int ret; - - session = cursor->session; - - for (op = ops; op->key != NULL; op++) { - key = value = NULL; - - /*! [cursor scope operation] */ - (void)snprintf(keybuf, sizeof(keybuf), "%s", op->key); - cursor->set_key(cursor, keybuf); - (void)snprintf(valuebuf, sizeof(valuebuf), "%s", op->value); - cursor->set_value(cursor, valuebuf); - - /* - * The application must keep key and value memory valid until - * the next operation that positions the cursor, modifies the - * data, or resets or closes the cursor. - * - * Modifying either the key or value buffers is not permitted. - */ - - /* Apply the operation (insert, update, search or remove). */ - if ((ret = op->apply(cursor)) != 0) { - fprintf(stderr, - "%s: error performing the operation: %s\n", - op->op, session->strerror(session, ret)); - return (ret); - } - - /* - * The cursor no longer references application memory, so - * application buffers can be safely overwritten. - */ - strcpy(keybuf, "no key"); - strcpy(valuebuf, "no value"); - - /* - * Check that get_key/value behave as expected after the - * operation. - */ - if (op->apply == cursor->insert) { - /* - * WT_CURSOR::insert no longer references application - * memory, but as it does not position the cursor, it - * doesn't reference memory owned by the cursor, either. - */ - printf("ex_scope: " - "expect two WiredTiger error messages:\n"); - if ((ret = cursor->get_key(cursor, &key)) == 0 || - (ret = cursor->get_value(cursor, &value)) == 0) { - fprintf(stderr, - "%s: error in get_key/value: %s\n", - op->op, session->strerror(session, ret)); - return (ret); - } - continue; - } - if (op->apply == cursor->remove) { - /* - * WT_CURSOR::remove no longer references application - * memory; as it does not position the cursor, it will - * reference key memory owned by the cursor, but has no - * value. - */ - printf("ex_scope: " - "expect one WiredTiger error message:\n"); - if ((ret = cursor->get_key(cursor, &key)) != 0 || - (ret = cursor->get_value(cursor, &value)) == 0) { - fprintf(stderr, - "%s: error in get_key/value: %s\n", - op->op, session->strerror(session, ret)); - return (ret); - } - } else /* search, update */{ - /* - * WT_CURSOR::search and WT_CURSOR::update no longer - * reference application memory; as they position the - * cursor, they will reference key/value memory owned - * by the cursor. - */ - if ((ret = cursor->get_key(cursor, &key)) != 0 || - (ret = cursor->get_value(cursor, &value)) != 0) { - fprintf(stderr, - "%s: error in get_key/value: %s\n", - op->op, session->strerror(session, ret)); - return (ret); - } - } - - /* - * Modifying the memory referenced by either key or value is - * not permitted. - * - * Check that the cursor's key and value are what we expect. - */ - if (key == keybuf || - (op->apply != cursor->remove && value == valuebuf)) { - fprintf(stderr, - "%s: cursor points at application memory!\n", - op->op); - return (EINVAL); - } - - if (strcmp(key, op->key) != 0 || - (op->apply != cursor->remove && - strcmp(value, op->value) != 0)) { - fprintf(stderr, - "%s: unexpected key / value!\n", op->op); - return (EINVAL); - } - /*! [cursor scope operation] */ - } - - return (0); -} - -int -main(void) -{ - WT_CONNECTION *conn; - WT_CURSOR *cursor; - WT_SESSION *session; - int ret; - - /* - * Create a clean test directory for this run of the test program if the - * environment variable isn't already set (as is done by make check). - */ - if (getenv("WIREDTIGER_HOME") == NULL) { - home = "WT_HOME"; - ret = system("rm -rf WT_HOME && mkdir WT_HOME"); - } else - home = NULL; - - /* Open a connection, create a simple table, open a cursor. */ - if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0 || - (ret = conn->open_session(conn, NULL, NULL, &session)) != 0) { - fprintf(stderr, "Error connecting to %s: %s\n", - home == NULL ? "." : home, wiredtiger_strerror(ret)); - return (EXIT_FAILURE); - } - - ret = session->create(session, - "table:scope", "key_format=S,value_format=S,columns=(k,v)"); - - ret = session->open_cursor(session, - "table:scope", NULL, NULL, &cursor); - - ret = cursor_scope_ops(cursor); - - /* Close the connection and clean up. */ - ret = conn->close(conn, NULL); - - return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); -} diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 4634059589b..3ae6e022906 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -8,6 +8,70 @@ #include "wt_internal.h" +/* + * WT_CURFILE_OP_XXX + * If we're going to return an error, we need to restore the cursor to + * a valid state, the upper-level cursor code is likely to retry. The macros + * here are called to save and restore that state. + */ +#define WT_CURFILE_OP_DECL \ + WT_ITEM __key_copy; \ + WT_ITEM __value_copy; \ + uint64_t __recno; \ + uint32_t __flags +#define WT_CURFILE_OP_PUSH do { \ + WT_ITEM_SET(__key_copy, cursor->key); \ + WT_ITEM_SET(__value_copy, cursor->value); \ + __recno = cursor->recno; \ + __flags = cursor->flags; \ +} while (0) +#define WT_CURFILE_OP_POP do { \ + cursor->recno = __recno; \ + if (FLD_ISSET(__flags, WT_CURSTD_KEY_EXT)) \ + WT_ITEM_SET(cursor->key, __key_copy); \ + if (FLD_ISSET(__flags, WT_CURSTD_VALUE_EXT)) \ + WT_ITEM_SET(cursor->value, __value_copy); \ + F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); \ + F_SET(cursor, \ + FLD_MASK(__flags, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT));\ +} while (0) + +/* + * __cursor_page_pinned -- + * Return if we have a page pinned and it's not been flagged for forced + * eviction (the forced eviction test is so we periodically release pages + * grown too large). + */ +static inline bool +__cursor_page_pinned(WT_CURSOR_BTREE *cbt) +{ + return (F_ISSET(cbt, WT_CBT_ACTIVE) && + cbt->ref->page->read_gen != WT_READGEN_OLDEST); +} + +/* + * __cursor_copy_int_key -- + * If we're pointing into the tree, save the key into local memory. + */ +static inline int +__cursor_copy_int_key(WT_CURSOR *cursor) +{ + /* + * We're about to discard the cursor's position and the cursor layer + * might retry the operation. We discard pinned pages on error, which + * will invalidate pinned keys. Clear WT_CURSTD_KEY_INT in all cases, + * the underlying page is gone whether we can allocate memory or not. + */ + if (F_ISSET(cursor, WT_CURSTD_KEY_INT)) { + F_CLR(cursor, WT_CURSTD_KEY_INT); + if (!WT_DATA_IN_ITEM(&cursor->key)) + WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session, + &cursor->key, cursor->key.data, cursor->key.size)); + F_SET(cursor, WT_CURSTD_KEY_EXT); + } + return (0); +} + /* * __cursor_size_chk -- * Return if an inserted item is too large. @@ -343,8 +407,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) * from the root. */ valid = false; - if (F_ISSET(cbt, WT_CBT_ACTIVE) && - cbt->ref->page->read_gen != WT_READGEN_OLDEST) { + if (__cursor_page_pinned(cbt)) { __wt_txn_cursor_op(session); WT_ERR(btree->type == BTREE_ROW ? @@ -422,9 +485,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) * existing record. */ valid = false; - if (btree->type == BTREE_ROW && - F_ISSET(cbt, WT_CBT_ACTIVE) && - cbt->ref->page->read_gen != WT_READGEN_OLDEST) { + if (btree->type == BTREE_ROW && __cursor_page_pinned(cbt)) { __wt_txn_cursor_op(session); WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true)); @@ -531,9 +592,18 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) retry: WT_RET(__cursor_func_init(cbt, true)); - switch (btree->type) { - case BTREE_COL_FIX: - case BTREE_COL_VAR: + if (btree->type == BTREE_ROW) { + WT_ERR(__cursor_row_search(session, cbt, NULL, true)); + /* + * If not overwriting, fail if the key exists, else insert the + * key/value pair. + */ + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && + cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) + WT_ERR(WT_DUPLICATE_KEY); + + ret = __cursor_row_modify(session, cbt, false); + } else { /* * If WT_CURSTD_APPEND is set, insert a new record (ignoring * the application's record number). The real record number @@ -558,19 +628,6 @@ retry: WT_RET(__cursor_func_init(cbt, true)); WT_ERR(__cursor_col_modify(session, cbt, false)); if (F_ISSET(cursor, WT_CURSTD_APPEND)) cbt->iface.recno = cbt->recno; - break; - case BTREE_ROW: - WT_ERR(__cursor_row_search(session, cbt, NULL, true)); - /* - * If not overwriting, fail if the key exists, else insert the - * key/value pair. - */ - if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && - cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) - WT_ERR(WT_DUPLICATE_KEY); - - ret = __cursor_row_modify(session, cbt, false); - break; } err: if (ret == WT_RESTART) { @@ -578,11 +635,9 @@ err: if (ret == WT_RESTART) { WT_STAT_DATA_INCR(session, cursor_restart); goto retry; } + /* Insert doesn't maintain a position across calls, clear resources. */ - if (ret == 0) - WT_TRET(__curfile_leave(cbt)); - if (ret != 0) - WT_TRET(__cursor_reset(cbt)); + WT_TRET(__cursor_reset(cbt)); return (ret); } @@ -640,29 +695,24 @@ __wt_btcur_update_check(WT_CURSOR_BTREE *cbt) retry: WT_RET(__cursor_func_init(cbt, true)); - switch (btree->type) { - case BTREE_ROW: + if (btree->type == BTREE_ROW) { WT_ERR(__cursor_row_search(session, cbt, NULL, true)); /* * Just check for conflicts. */ ret = __curfile_update_check(cbt); - break; - case BTREE_COL_FIX: - case BTREE_COL_VAR: + } else WT_ERR(__wt_illegal_value(session, NULL)); - break; - } err: if (ret == WT_RESTART) { WT_STAT_CONN_INCR(session, cursor_restart); WT_STAT_DATA_INCR(session, cursor_restart); goto retry; } - WT_TRET(__curfile_leave(cbt)); - if (ret != 0) - WT_TRET(__cursor_reset(cbt)); + + /* Insert doesn't maintain a position across calls, clear resources. */ + WT_TRET(__cursor_reset(cbt)); return (ret); } @@ -674,23 +724,83 @@ int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_OP_DECL; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; + bool positioned; btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; + WT_CURFILE_OP_PUSH; + WT_STAT_CONN_INCR(session, cursor_remove); WT_STAT_DATA_INCR(session, cursor_remove); WT_STAT_DATA_INCRV(session, cursor_remove_bytes, cursor->key.size); -retry: WT_RET(__cursor_func_init(cbt, true)); + /* + * WT_CURSOR.remove has a unique semantic, the cursor stays positioned + * if it starts positioned, otherwise clear the cursor on completion. + */ + positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT); - switch (btree->type) { - case BTREE_COL_FIX: - case BTREE_COL_VAR: +retry: + /* + * If removing with overwrite configured, and positioned to an on-page + * key, the update doesn't require another search. The cursor won't be + * positioned on a page with an external key set, but be sure. + */ + if (__cursor_page_pinned(cbt) && + F_ISSET_ALL(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_OVERWRITE)) { + WT_ERR(__wt_txn_autocommit_check(session)); + + /* + * The cursor position may not be exact (the cursor's comparison + * value not equal to zero). Correct to an exact match so we can + * remove whatever we're pointing at. + */ + cbt->compare = 0; + ret = btree->type == BTREE_ROW ? + __cursor_row_modify(session, cbt, true) : + __cursor_col_modify(session, cbt, true); + + /* + * The pinned page goes away if we fail for any reason, make + * sure there's a local copy of any key. (Restart could still + * use the pinned page, but that's an unlikely path.) Re-save + * the cursor state: we may retry but eventually fail. + */ + if (ret != 0) { + WT_TRET(__cursor_copy_int_key(cursor)); + WT_CURFILE_OP_PUSH; + goto err; + } + goto done; + } + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Re-save the cursor state: we may retry but + * eventually fail. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + WT_CURFILE_OP_PUSH; + + WT_ERR(__cursor_func_init(cbt, true)); + + if (btree->type == BTREE_ROW) { + WT_ERR(__cursor_row_search(session, cbt, NULL, false)); + + /* Check whether an update would conflict. */ + WT_ERR(__curfile_update_check(cbt)); + + if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) + WT_ERR(WT_NOTFOUND); + + ret = __cursor_row_modify(session, cbt, true); + } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); /* @@ -717,19 +827,6 @@ retry: WT_RET(__cursor_func_init(cbt, true)); cbt->recno = cursor->recno; } else ret = __cursor_col_modify(session, cbt, true); - break; - case BTREE_ROW: - /* Remove the record if it exists. */ - WT_ERR(__cursor_row_search(session, cbt, NULL, false)); - - /* Check whether an update would conflict. */ - WT_ERR(__curfile_update_check(cbt)); - - if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) - WT_ERR(WT_NOTFOUND); - - ret = __cursor_row_modify(session, cbt, true); - break; } err: if (ret == WT_RESTART) { @@ -737,15 +834,29 @@ err: if (ret == WT_RESTART) { WT_STAT_DATA_INCR(session, cursor_restart); goto retry; } + /* - * If the cursor is configured to overwrite and the record is not - * found, that is exactly what we want. + * If the cursor is configured to overwrite and the record is not found, + * that is exactly what we want, return success. */ if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) && ret == WT_NOTFOUND) ret = 0; - if (ret != 0) +done: /* + * If the cursor was positioned, it stays positioned, point the cursor + * at an internal copy of the key. Otherwise, there's no position or + * key/value. + */ + if (ret == 0) + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (ret == 0 && positioned) { + WT_TRET(__wt_key_return(session, cbt)); + if (ret == 0) + F_SET(cursor, WT_CURSTD_KEY_INT); + } else WT_TRET(__cursor_reset(cbt)); + if (ret != 0) + WT_CURFILE_OP_POP; return (ret); } @@ -779,9 +890,19 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt) retry: WT_RET(__cursor_func_init(cbt, true)); - switch (btree->type) { - case BTREE_COL_FIX: - case BTREE_COL_VAR: + if (btree->type == BTREE_ROW) { + WT_ERR(__cursor_row_search(session, cbt, NULL, true)); + /* + * If not overwriting, check for conflicts and fail if the key + * does not exist. + */ + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { + WT_ERR(__curfile_update_check(cbt)); + if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) + WT_ERR(WT_NOTFOUND); + } + ret = __cursor_row_modify(session, cbt, false); + } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); /* @@ -800,20 +921,6 @@ retry: WT_RET(__cursor_func_init(cbt, true)); WT_ERR(WT_NOTFOUND); } ret = __cursor_col_modify(session, cbt, false); - break; - case BTREE_ROW: - WT_ERR(__cursor_row_search(session, cbt, NULL, true)); - /* - * If not overwriting, check for conflicts and fail if the key - * does not exist. - */ - if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { - WT_ERR(__curfile_update_check(cbt)); - if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) - WT_ERR(WT_NOTFOUND); - } - ret = __cursor_row_modify(session, cbt, false); - break; } err: if (ret == WT_RESTART) { @@ -963,9 +1070,12 @@ __cursor_truncate(WT_SESSION_IMPL *session, WT_DECL_RET; /* - * First, call the standard cursor remove method to do a full search and - * re-position the cursor because we don't have a saved copy of the - * page's write generation information, which we need to remove records. + * First, call the cursor search method to re-position the cursor: we + * may not have a cursor position (if the higher-level truncate code + * switched the cursors to have an "external" cursor key, and because + * we don't save a copy of the page's write generation information, + * which we need to remove records. + * * Once that's done, we can delete records without a full search, unless * we encounter a restart error because the page was modified by some * other thread of control; in that case, repeat the full search to @@ -978,20 +1088,31 @@ __cursor_truncate(WT_SESSION_IMPL *session, * instantiated the end cursor, so we know that page is pinned in memory * and we can proceed without concern. */ -retry: WT_RET(__wt_btcur_remove(start)); +retry: WT_RET(__wt_btcur_search(start)); + + /* + * XXX KEITH + * When the btree cursor code sets/clears the cursor flags (rather than + * the cursor layer), the set/clear goes away, only the assert remains. + */ + F_CLR((WT_CURSOR *)start, WT_CURSTD_KEY_SET); + F_SET((WT_CURSOR *)start, WT_CURSTD_KEY_INT); + WT_ASSERT(session, + F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); /* * Reset ret each time through so that we don't loop forever in * the cursor equals case. */ for (ret = 0;;) { + if ((ret = rmfunc(session, start, 1)) != 0) + break; + if (stop != NULL && __cursor_equals(start, stop)) break; if ((ret = __wt_btcur_next(start, true)) != 0) break; - start->compare = 0; /* Exact match */ - if ((ret = rmfunc(session, start, 1)) != 0) - break; + start->compare = 0; /* Exact match */ } if (ret == WT_RESTART) { @@ -1024,29 +1145,44 @@ __cursor_truncate_fix(WT_SESSION_IMPL *session, * record 37, records 1-36 magically appear. Those records can't be * deleted, which means we have to ignore already "deleted" records. * - * First, call the standard cursor remove method to do a full search and - * re-position the cursor because we don't have a saved copy of the - * page's write generation information, which we need to remove records. + * First, call the cursor search method to re-position the cursor: we + * may not have a cursor position (if the higher-level truncate code + * switched the cursors to have an "external" cursor key, and because + * we don't save a copy of the page's write generation information, + * which we need to remove records. + * * Once that's done, we can delete records without a full search, unless * we encounter a restart error because the page was modified by some * other thread of control; in that case, repeat the full search to * refresh the page's modification information. */ -retry: WT_RET(__wt_btcur_remove(start)); +retry: WT_RET(__wt_btcur_search(start)); + + /* + * XXX KEITH + * When the btree cursor code sets/clears the cursor flags (rather than + * the cursor layer), the set/clear goes away, only the assert remains. + */ + F_CLR((WT_CURSOR *)start, WT_CURSTD_KEY_SET); + F_SET((WT_CURSOR *)start, WT_CURSTD_KEY_INT); + WT_ASSERT(session, + F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); + /* * Reset ret each time through so that we don't loop forever in * the cursor equals case. */ for (ret = 0;;) { + value = (const uint8_t *)start->iface.value.data; + if (*value != 0 && + (ret = rmfunc(session, start, 1)) != 0) + break; + if (stop != NULL && __cursor_equals(start, stop)) break; if ((ret = __wt_btcur_next(start, true)) != 0) break; start->compare = 0; /* Exact match */ - value = (const uint8_t *)start->iface.value.data; - if (*value != 0 && - (ret = rmfunc(session, start, 1)) != 0) - break; } if (ret == WT_RESTART) { @@ -1166,7 +1302,7 @@ __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel) * Skip the usual cursor tear-down in that case. */ if (!lowlevel) - ret = __curfile_leave(cbt); + ret = __cursor_reset(cbt); __wt_buf_free(session, &cbt->_row_key); __wt_buf_free(session, &cbt->_tmp); diff --git a/src/btree/bt_ret.c b/src/btree/bt_ret.c index 6409a1a180c..9fc457e2297 100644 --- a/src/btree/bt_ret.c +++ b/src/btree/bt_ret.c @@ -9,64 +9,21 @@ #include "wt_internal.h" /* - * __wt_kv_return -- - * Return a page referenced key/value pair to the application. + * __key_return -- + * Change the cursor to reference an internal return key. */ -int -__wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) +static inline int +__key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) { - WT_BTREE *btree; - WT_CELL *cell; - WT_CELL_UNPACK unpack; WT_CURSOR *cursor; WT_ITEM *tmp; WT_PAGE *page; WT_ROW *rip; - uint8_t v; - - btree = S2BT(session); page = cbt->ref->page; cursor = &cbt->iface; - switch (page->type) { - case WT_PAGE_COL_FIX: - /* - * The interface cursor's record has usually been set, but that - * isn't universally true, specifically, cursor.search_near may - * call here without first setting the interface cursor. - */ - cursor->recno = cbt->recno; - - /* If the cursor references a WT_UPDATE item, return it. */ - if (upd != NULL) { - cursor->value.data = WT_UPDATE_DATA(upd); - cursor->value.size = upd->size; - return (0); - } - - /* Take the value from the original page. */ - v = __bit_getv_recno(cbt->ref, cursor->recno, btree->bitcnt); - return (__wt_buf_set(session, &cursor->value, &v, 1)); - case WT_PAGE_COL_VAR: - /* - * The interface cursor's record has usually been set, but that - * isn't universally true, specifically, cursor.search_near may - * call here without first setting the interface cursor. - */ - cursor->recno = cbt->recno; - - /* If the cursor references a WT_UPDATE item, return it. */ - if (upd != NULL) { - cursor->value.data = WT_UPDATE_DATA(upd); - cursor->value.size = upd->size; - return (0); - } - - /* Take the value from the original page cell. */ - cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]); - break; - case WT_PAGE_ROW_LEAF: + if (page->type == WT_PAGE_ROW_LEAF) { rip = &page->pg_row[cbt->slot]; /* @@ -79,7 +36,10 @@ __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) if (cbt->ins != NULL) { cursor->key.data = WT_INSERT_KEY(cbt->ins); cursor->key.size = WT_INSERT_KEY_SIZE(cbt->ins); - } else if (cbt->compare == 0) { + return (0); + } + + if (cbt->compare == 0) { /* * If not in an insert list and there's an exact match, * the row-store search function built the key we want @@ -97,16 +57,51 @@ __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) cursor->key.data = cbt->row_key->data; cursor->key.size = cbt->row_key->size; - } else - WT_RET(__wt_row_leaf_key( - session, page, rip, &cursor->key, false)); - - /* If the cursor references a WT_UPDATE item, return it. */ - if (upd != NULL) { - cursor->value.data = WT_UPDATE_DATA(upd); - cursor->value.size = upd->size; return (0); } + return (__wt_row_leaf_key( + session, page, rip, &cursor->key, false)); + } + + /* + * WT_PAGE_COL_FIX, WT_PAGE_COL_VAR: + * The interface cursor's record has usually been set, but that + * isn't universally true, specifically, cursor.search_near may call + * here without first setting the interface cursor. + */ + cursor->recno = cbt->recno; + return (0); +} + +/* + * __value_return -- + * Change the cursor to reference an internal return value. + */ +static inline int +__value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) +{ + WT_BTREE *btree; + WT_CELL *cell; + WT_CELL_UNPACK unpack; + WT_CURSOR *cursor; + WT_PAGE *page; + WT_ROW *rip; + uint8_t v; + + btree = S2BT(session); + + page = cbt->ref->page; + cursor = &cbt->iface; + + /* If the cursor references a WT_UPDATE item, return it. */ + if (upd != NULL) { + cursor->value.data = WT_UPDATE_DATA(upd); + cursor->value.size = upd->size; + return (0); + } + + if (page->type == WT_PAGE_ROW_LEAF) { + rip = &page->pg_row[cbt->slot]; /* Simple values have their location encoded in the WT_ROW. */ if (__wt_row_leaf_value(page, rip, &cursor->value)) @@ -121,13 +116,46 @@ __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) cursor->value.size = 0; return (0); } - break; - WT_ILLEGAL_VALUE(session); + __wt_cell_unpack(cell, &unpack); + return (__wt_page_cell_data_ref( + session, page, &unpack, &cursor->value)); + + } + + if (page->type == WT_PAGE_COL_VAR) { + /* Take the value from the original page cell. */ + cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]); + __wt_cell_unpack(cell, &unpack); + return (__wt_page_cell_data_ref( + session, page, &unpack, &cursor->value)); } - /* The value is an on-page cell, unpack and expand it as necessary. */ - __wt_cell_unpack(cell, &unpack); - WT_RET(__wt_page_cell_data_ref(session, page, &unpack, &cursor->value)); + /* WT_PAGE_COL_FIX: Take the value from the original page. */ + v = __bit_getv_recno(cbt->ref, cursor->recno, btree->bitcnt); + return (__wt_buf_set(session, &cursor->value, &v, 1)); +} + +/* + * __wt_key_return -- + * Change the cursor to reference an internal return key. + */ +int +__wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) +{ + WT_RET(__key_return(session, cbt)); + + return (0); +} + +/* + * __wt_kv_return -- + * Return a page referenced key/value pair to the application. + */ +int +__wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) +{ + WT_RET(__wt_key_return(session, cbt)); + WT_RET(__value_return(session, cbt, upd)); return (0); } diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index 0ec917fbf95..274dc1e8f62 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -325,24 +325,21 @@ __curfile_remove(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_REMOVE_API_CALL(cursor, session, cbt->btree); - WT_CURSOR_NEEDKEY(cursor); + WT_CURSOR_CHECKKEY(cursor); WT_CURSOR_NOVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, __wt_btcur_remove(cbt), ret); + WT_ERR(__wt_btcur_remove(cbt)); /* - * After a successful remove, copy the key: the value is not available. + * Remove with a search-key is fire-and-forget, no position and no key. + * Remove starting from a position maintains the position and a key. + * We don't know which it was at this layer, so can only assert the key + * is not set at all, or internal. There's never a value. */ - if (ret == 0) { - if (F_ISSET(cursor, WT_CURSTD_KEY_INT) && - !WT_DATA_IN_ITEM(&(cursor)->key)) { - WT_ERR(__wt_buf_set(session, &cursor->key, - cursor->key.data, cursor->key.size)); - F_CLR(cursor, WT_CURSTD_KEY_INT); - F_SET(cursor, WT_CURSTD_KEY_EXT); - } - F_CLR(cursor, WT_CURSTD_VALUE_SET); - } + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == 0 || + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); + WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0); err: CURSOR_UPDATE_API_END(session, ret); return (ret); diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c index 7e8cd153d2d..98dbbec8981 100644 --- a/src/cursor/cur_table.c +++ b/src/cursor/cur_table.c @@ -511,9 +511,16 @@ __curtable_insert(WT_CURSOR *cursor) */ F_SET(primary, flag_orig | WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT); - if (ret == WT_DUPLICATE_KEY && F_ISSET(cursor, WT_CURSTD_OVERWRITE)) + if (ret == WT_DUPLICATE_KEY && F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { WT_ERR(__curtable_update(cursor)); - else { + + /* + * The cursor is no longer positioned. This isn't just cosmetic, + * without a reset, iteration on this cursor won't start at the + * beginning/end of the table. + */ + APPLY_CG(ctable, reset); + } else { WT_ERR(ret); for (i = 1; i < WT_COLGROUPS(ctable->table); i++, cp++) { @@ -601,14 +608,20 @@ err: CURSOR_UPDATE_API_END(session, ret); static int __curtable_remove(WT_CURSOR *cursor) { + WT_CURSOR *primary; WT_CURSOR_TABLE *ctable; WT_DECL_RET; WT_SESSION_IMPL *session; + bool positioned; ctable = (WT_CURSOR_TABLE *)cursor; JOINABLE_CURSOR_REMOVE_API_CALL(cursor, session, NULL); WT_ERR(__curtable_open_indices(ctable)); + /* Check if the cursor was positioned. */ + primary = *ctable->cg_cursors; + positioned = F_ISSET(primary, WT_CURSTD_KEY_INT); + /* Find the old record so it can be removed from indices */ if (ctable->table->nindices > 0) { APPLY_CG(ctable, search); @@ -617,6 +630,19 @@ __curtable_remove(WT_CURSOR *cursor) } APPLY_CG(ctable, remove); + WT_ERR(ret); + + /* + * If the cursor was positioned, it stays positioned with a key but no + * no value, otherwise, there's no position, key or value. This isn't + * just cosmetic, without a reset, iteration on this cursor won't start + * at the beginning/end of the table. + */ + F_CLR(primary, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (positioned) + F_SET(primary, WT_CURSTD_KEY_INT); + else + APPLY_CG(ctable, reset); err: CURSOR_UPDATE_API_END(session, ret); return (ret); diff --git a/src/docs/cursor-ops.dox b/src/docs/cursor-ops.dox index b743d81db57..e479ff29191 100644 --- a/src/docs/cursor-ops.dox +++ b/src/docs/cursor-ops.dox @@ -145,9 +145,5 @@ that may not be modified or freed by the application. If a longer scope is required, the application must make a copy of the memory before the cursor is re-used, closed or reset. -The comments in this example code explain when the application can safely -modify memory passed to WT_CURSOR::set_key or WT_CURSOR::set_value: - -@snippet ex_scope.c cursor scope operation @m_endif */ diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index 2894db0c126..e5fce3d0d5d 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -2,28 +2,45 @@ @section version_292 Upgrading to Version 2.9.2
-
WiredTiger Utility now supports truncate
+ +
WiredTiger utility now supports truncate
-The WiredTiger Utility can now \c truncate an object. Removing all contents -from the specified object. +The WiredTiger utility \c wt can now \c truncate objects, removing all +contents from the specified object.
+
Handle list lock statistics
In the 2.9.1 release we added statistics tracking handle list lock timing, we have switched that lock from a spin lock to a read-write lock, and consequently changed the statistics tracking lock related wait time.
+
Forced and named checkpoint error conditions changed
There are new cases where checkpoints created with an explicit name or the "force" configuration option can return an EBUSY error. This can happen if the checkpoint overlaps with other schema operations, for example table create.
-
-@section version_291 Upgrading to Version 2.9.1 +
WT_CURSOR::remove may not return a positioned cursor
+
+The WT_CURSOR::remove method was previously documented to always return a +positioned cursor on success, which is not possible when \c overwrite=true +and the record does not exist. + +The documentation has been updated, and the method has been changed to +never return a cursor position unless called with an existing cursor +position. In other words, if the cursor is positioned and the +WT_CURSOR::remove is called, the cursor will remain positioned; if the +cursor is not positioned and the WT_CURSOR::remove method is called, the +cursor will not be positioned on return. +
+
+@section version_291 Upgrading to Version 2.9.1
+
Changes to hazard pointer configuration
The \c hazard_max parameter to ::wiredtiger_open is now ignored. Memory is @@ -39,10 +56,11 @@ have added a new \c access_pattern_hint configuration option available for WT_SESSION::create that can be used to restore the old default by setting the value to "random".
-
+
@section version_290 Upgrading to Version 2.9.0
+
Changes to cursor behavior after WT_CURSOR::insert
After a successful call to WT_CURSOR::insert, unless a cursor has record diff --git a/src/include/api.h b/src/include/api.h index 2783d17f825..1fa777ed5cc 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -7,22 +7,21 @@ */ /* Standard entry points to the API: declares/initializes local variables. */ -#define API_SESSION_INIT(s, h, n, cur, dh) \ +#define API_SESSION_INIT(s, h, n, dh) \ WT_DATA_HANDLE *__olddh = (s)->dhandle; \ const char *__oldname = (s)->name; \ - (s)->cursor = (cur); \ (s)->dhandle = (dh); \ (s)->name = (s)->lastop = #h "." #n; \ -#define API_CALL_NOCONF(s, h, n, cur, dh) do { \ - API_SESSION_INIT(s, h, n, cur, dh); \ +#define API_CALL_NOCONF(s, h, n, dh) do { \ + API_SESSION_INIT(s, h, n, dh); \ WT_ERR(WT_SESSION_CHECK_PANIC(s)); \ __wt_verbose((s), WT_VERB_API, "CALL: " #h ":" #n) -#define API_CALL(s, h, n, cur, dh, config, cfg) do { \ +#define API_CALL(s, h, n, dh, config, cfg) do { \ const char *cfg[] = \ { WT_CONFIG_BASE(s, h##_##n), config, NULL }; \ - API_SESSION_INIT(s, h, n, cur, dh); \ + API_SESSION_INIT(s, h, n, dh); \ WT_ERR(WT_SESSION_CHECK_PANIC(s)); \ if ((config) != NULL) \ WT_ERR(__wt_config_check((s), \ @@ -42,17 +41,17 @@ } while (0) /* An API call wrapped in a transaction if necessary. */ -#define TXN_API_CALL(s, h, n, cur, bt, config, cfg) do { \ +#define TXN_API_CALL(s, h, n, bt, config, cfg) do { \ bool __autotxn = false; \ - API_CALL(s, h, n, bt, cur, config, cfg); \ + API_CALL(s, h, n, bt, config, cfg); \ __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\ if (__autotxn) \ F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT) /* An API call wrapped in a transaction if necessary. */ -#define TXN_API_CALL_NOCONF(s, h, n, cur, bt) do { \ +#define TXN_API_CALL_NOCONF(s, h, n, bt) do { \ bool __autotxn = false; \ - API_CALL_NOCONF(s, h, n, cur, bt); \ + API_CALL_NOCONF(s, h, n, bt); \ __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\ if (__autotxn) \ F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT) @@ -98,24 +97,24 @@ #define CONNECTION_API_CALL(conn, s, n, config, cfg) \ s = (conn)->default_session; \ - API_CALL(s, WT_CONNECTION, n, NULL, NULL, config, cfg) + API_CALL(s, WT_CONNECTION, n, NULL, config, cfg) #define CONNECTION_API_CALL_NOCONF(conn, s, n) \ s = (conn)->default_session; \ - API_CALL_NOCONF(s, WT_CONNECTION, n, NULL, NULL) + API_CALL_NOCONF(s, WT_CONNECTION, n, NULL) #define SESSION_API_CALL(s, n, config, cfg) \ - API_CALL(s, WT_SESSION, n, NULL, NULL, config, cfg) + API_CALL(s, WT_SESSION, n, NULL, config, cfg) #define SESSION_API_CALL_NOCONF(s, n) \ - API_CALL_NOCONF(s, WT_SESSION, n, NULL, NULL) + API_CALL_NOCONF(s, WT_SESSION, n, NULL) #define SESSION_TXN_API_CALL(s, n, config, cfg) \ - TXN_API_CALL(s, WT_SESSION, n, NULL, NULL, config, cfg) + TXN_API_CALL(s, WT_SESSION, n, NULL, config, cfg) #define CURSOR_API_CALL(cur, s, n, bt) \ (s) = (WT_SESSION_IMPL *)(cur)->session; \ - API_CALL_NOCONF(s, WT_CURSOR, n, cur, \ + API_CALL_NOCONF(s, WT_CURSOR, n, \ ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle) #define JOINABLE_CURSOR_CALL_CHECK(cur) \ @@ -128,7 +127,7 @@ #define CURSOR_REMOVE_API_CALL(cur, s, bt) \ (s) = (WT_SESSION_IMPL *)(cur)->session; \ - TXN_API_CALL_NOCONF(s, WT_CURSOR, remove, cur, \ + TXN_API_CALL_NOCONF(s, WT_CURSOR, remove, \ ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); #define JOINABLE_CURSOR_REMOVE_API_CALL(cur, s, bt) \ @@ -137,7 +136,7 @@ #define CURSOR_UPDATE_API_CALL(cur, s, n, bt) \ (s) = (WT_SESSION_IMPL *)(cur)->session; \ - TXN_API_CALL_NOCONF(s, WT_CURSOR, n, cur, \ + TXN_API_CALL_NOCONF(s, WT_CURSOR, n, \ ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \ if (F_ISSET(S2C(s), WT_CONN_IN_MEMORY) && \ !F_ISSET((WT_BTREE *)(bt), WT_BTREE_IGNORE_CACHE) && \ @@ -153,4 +152,4 @@ #define ASYNCOP_API_CALL(conn, s, n) \ s = (conn)->default_session; \ - API_CALL_NOCONF(s, asyncop, n, NULL, NULL) + API_CALL_NOCONF(s, asyncop, n, NULL) diff --git a/src/include/buf.i b/src/include/buf.i index ebbee6b4633..d192e292dcf 100644 --- a/src/include/buf.i +++ b/src/include/buf.i @@ -37,28 +37,30 @@ __wt_buf_extend(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) /* * __wt_buf_init -- - * Initialize a buffer at a specific size. + * Create an empty buffer at a specific size. */ static inline int __wt_buf_init(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) { + /* + * The buffer grow function does what we need, but anticipates data + * referenced by the buffer. Avoid any data copy by setting data to + * reference the buffer's allocated memory, and clearing it. + */ buf->data = buf->mem; - buf->size = 0; /* Clear existing data length */ - WT_RET(__wt_buf_grow(session, buf, size)); - - return (0); + buf->size = 0; + return (__wt_buf_grow(session, buf, size)); } /* * __wt_buf_initsize -- - * Initialize a buffer at a specific size, and set the data length. + * Create an empty buffer at a specific size, and set the data length. */ static inline int __wt_buf_initsize(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) { - buf->data = buf->mem; - buf->size = 0; /* Clear existing data length */ - WT_RET(__wt_buf_grow(session, buf, size)); + WT_RET(__wt_buf_init(session, buf, size)); + buf->size = size; /* Set the data length. */ return (0); @@ -72,14 +74,15 @@ static inline int __wt_buf_set( WT_SESSION_IMPL *session, WT_ITEM *buf, const void *data, size_t size) { - /* Ensure the buffer is large enough. */ - WT_RET(__wt_buf_initsize(session, buf, size)); - - /* Copy the data, allowing for overlapping strings. */ - if (size != 0) - memmove(buf->mem, data, size); - - return (0); + /* + * The buffer grow function does what we need, but expects the data to + * be referenced by the buffer. If we're copying data from outside the + * buffer, set it up so it makes sense to the buffer grow function. (No + * test needed, this works if WT_ITEM.data is already set to "data".) + */ + buf->data = data; + buf->size = size; + return (__wt_buf_grow(session, buf, size)); } /* diff --git a/src/include/cursor.i b/src/include/cursor.i index c3fcef9a13d..9cb9f5e7189 100644 --- a/src/include/cursor.i +++ b/src/include/cursor.i @@ -93,17 +93,19 @@ __curfile_enter(WT_CURSOR_BTREE *cbt) } /* - * __curfile_leave -- - * Clear a file cursor's position. + * __cursor_reset -- + * Reset the cursor, it no longer holds any position. */ static inline int -__curfile_leave(WT_CURSOR_BTREE *cbt) +__cursor_reset(WT_CURSOR_BTREE *cbt) { WT_DECL_RET; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)cbt->iface.session; + __cursor_pos_clear(cbt); + /* If the cursor was active, deactivate it. */ if (F_ISSET(cbt, WT_CBT_ACTIVE)) { if (!F_ISSET(cbt, WT_CBT_NO_TXN)) @@ -111,12 +113,15 @@ __curfile_leave(WT_CURSOR_BTREE *cbt) F_CLR(cbt, WT_CBT_ACTIVE); } + /* If we're not holding a cursor reference, we're done. */ + if (cbt->ref == NULL) + return (0); + /* * If we were scanning and saw a lot of deleted records on this page, * try to evict the page when we release it. */ - if (cbt->ref != NULL && - cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD) + if (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD) __wt_page_evict_soon(session, cbt->ref); cbt->page_deleted_count = 0; @@ -247,7 +252,7 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter) #ifdef HAVE_DIAGNOSTIC __wt_cursor_key_order_reset(cbt); #endif - WT_RET(__curfile_leave(cbt)); + WT_RET(__cursor_reset(cbt)); } /* @@ -271,24 +276,6 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter) return (0); } -/* - * __cursor_reset -- - * Reset the cursor. - */ -static inline int -__cursor_reset(WT_CURSOR_BTREE *cbt) -{ - WT_DECL_RET; - - /* - * The cursor is leaving the API, and no longer holds any position, - * generally called to clean up the cursor after an error. - */ - ret = __curfile_leave(cbt); - __cursor_pos_clear(cbt); - return (ret); -} - /* * __cursor_row_slot_return -- * Return a row-store leaf page slot's K/V pair. diff --git a/src/include/error.h b/src/include/error.h index bbb7f989332..c338acb370f 100644 --- a/src/include/error.h +++ b/src/include/error.h @@ -67,14 +67,16 @@ int __ret; \ if ((__ret = (a)) != 0 && \ (__ret == WT_PANIC || \ - ret == 0 || ret == WT_DUPLICATE_KEY || ret == WT_NOTFOUND)) \ + ret == 0 || ret == WT_DUPLICATE_KEY || \ + ret == WT_NOTFOUND || ret == WT_RESTART)) \ ret = __ret; \ } while (0) #define WT_TRET_ERROR_OK(a, e) do { \ int __ret; \ if ((__ret = (a)) != 0 && __ret != (e) && \ (__ret == WT_PANIC || \ - ret == 0 || ret == WT_DUPLICATE_KEY || ret == WT_NOTFOUND)) \ + ret == 0 || ret == WT_DUPLICATE_KEY || \ + ret == WT_NOTFOUND || ret == WT_RESTART)) \ ret = __ret; \ } while (0) #define WT_TRET_NOTFOUND_OK(a) WT_TRET_ERROR_OK(a, WT_NOTFOUND) diff --git a/src/include/extern.h b/src/include/extern.h index db718966426..c0aa21b7f4c 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -161,6 +161,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags #endif ) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern bool __wt_split_obsolete(WT_SESSION_IMPL *session, uint64_t split_gen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/misc.h b/src/include/misc.h index 66d43496e93..7aba397e173 100644 --- a/src/include/misc.h +++ b/src/include/misc.h @@ -140,6 +140,7 @@ #define F_CLR(p, mask) FLD_CLR((p)->flags, mask) #define F_ISSET(p, mask) FLD_ISSET((p)->flags, mask) +#define F_ISSET_ALL(p, mask) (FLD_MASK((p)->flags, mask) == (mask)) #define F_MASK(p, mask) FLD_MASK((p)->flags, mask) #define F_SET(p, mask) FLD_SET((p)->flags, mask) diff --git a/src/include/session.h b/src/include/session.h index f3092dc3c6c..dec97cff5d3 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -67,7 +67,6 @@ struct __wt_session_impl { TAILQ_HEAD(__dhandles, __wt_data_handle_cache) dhandles; time_t last_sweep; /* Last sweep for dead handles */ - WT_CURSOR *cursor; /* Current cursor */ /* Cursors closed with the session */ TAILQ_HEAD(__cursors, __wt_cursor) cursors; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index c148e759299..5dd9a720e31 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -427,7 +427,7 @@ struct __wt_cursor { * * @param cursor the cursor handle * @errors - * In particular, if \c overwrite is not configured and a record with + * In particular, if \c overwrite=false is configured and a record with * the specified key already exists, ::WT_DUPLICATE_KEY is returned. * Also, if \c in_memory is configured for the database and the insert * requires more than the configured cache size to complete, @@ -452,7 +452,9 @@ struct __wt_cursor { * * On success, the cursor ends positioned at the modified record; to * minimize cursor resources, the WT_CURSOR::reset method should be - * called as soon as the cursor no longer needs that position. + * called as soon as the cursor no longer needs that position. (The + * WT_CURSOR::insert method never keeps a cursor position and may be + * more efficient for that reason.) * * The maximum length of a single column stored in a table is not fixed * (as it partially depends on the underlying file configuration), but @@ -460,7 +462,7 @@ struct __wt_cursor { * * @param cursor the cursor handle * @errors - * In particular, if \c overwrite is not configured and no record with + * In particular, if \c overwrite=false is configured and no record with * the specified key exists, ::WT_NOTFOUND is returned. * Also, if \c in_memory is configured for the database and the insert * requires more than the configured cache size to complete, @@ -477,8 +479,18 @@ struct __wt_cursor { * * @snippet ex_all.c Remove a record * - * If the cursor was not configured with "overwrite=true", the key must - * be set and the key's record must exist; the record will be removed. + * If the cursor was configured with "overwrite=false" (not the + * default), the key must be set and the key's record must exist; the + * record will be removed. + * + * Any cursor position does not change: if the cursor was positioned + * before the WT_CURSOR::remove call, the cursor remains positioned + * at the removed record; to minimize cursor resources, the + * WT_CURSOR::reset method should be called as soon as the cursor no + * longer needs that position. If the cursor was not positioned before + * the WT_CURSOR::remove call, the cursor ends with no position, and a + * subsequent call to the WT_CURSOR::next (WT_CURSOR::prev) method will + * iterate from the beginning (end) of the table. * * @snippet ex_all.c Remove a record and fail if DNE * @@ -486,14 +498,10 @@ struct __wt_cursor { * (that is, a store with an 'r' type key and 't' type value) is * identical to setting the record's value to 0. * - * On success, the cursor ends positioned at the removed record; to - * minimize cursor resources, the WT_CURSOR::reset method should be - * called as soon as the cursor no longer needs that position. - * * @param cursor the cursor handle * @errors - * In particular, if \c overwrite is not configured and no record with - * the specified key exists, ::WT_NOTFOUND is returned. + * In particular, if \c overwrite=false is configured and no record + * with the specified key exists, ::WT_NOTFOUND is returned. */ int __F(remove)(WT_CURSOR *cursor); /*! @} */ diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 116740f8f0c..77fa96ebdfd 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -1565,12 +1565,23 @@ __clsm_update(WT_CURSOR *cursor) WT_CURSOR_NEEDVALUE(cursor); WT_ERR(__clsm_enter(clsm, false, true)); - if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) || - (ret = __clsm_lookup(clsm, &value)) == 0) { - WT_ERR(__clsm_deleted_encode( - session, &cursor->value, &value, &buf)); - ret = __clsm_put(session, clsm, &cursor->key, &value, true); - } + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) + WT_ERR(__clsm_lookup(clsm, &value)); + WT_ERR(__clsm_deleted_encode(session, &cursor->value, &value, &buf)); + WT_ERR(__clsm_put(session, clsm, &cursor->key, &value, true)); + + /* + * Set the cursor to reference the internal key/value of the positioned + * cursor. + */ + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + WT_ITEM_SET(cursor->key, clsm->current->key); + WT_ITEM_SET(cursor->value, clsm->current->value); + WT_ASSERT(session, + F_MASK(clsm->current, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); + WT_ASSERT(session, + F_MASK(clsm->current, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); err: __wt_scr_free(session, &buf); __clsm_leave(clsm); @@ -1589,9 +1600,13 @@ __clsm_remove(WT_CURSOR *cursor) WT_DECL_RET; WT_ITEM value; WT_SESSION_IMPL *session; + bool positioned; clsm = (WT_CURSOR_LSM *)cursor; + /* Check if the cursor is positioned. */ + positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT); + CURSOR_REMOVE_API_CALL(cursor, session, NULL); WT_CURSOR_NEEDKEY(cursor); WT_CURSOR_NOVALUE(cursor); @@ -1600,9 +1615,22 @@ __clsm_remove(WT_CURSOR *cursor) if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) || (ret = __clsm_lookup(clsm, &value)) == 0) ret = __clsm_put( - session, clsm, &cursor->key, &__tombstone, true); + session, clsm, &cursor->key, &__tombstone, positioned); err: __clsm_leave(clsm); + + /* + * If the cursor was positioned, it stays positioned with a key but no + * no value, otherwise, there's no position, key or value. This isn't + * just cosmetic, without a reset, iteration on this cursor won't start + * at the beginning/end of the table. + */ + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (positioned) + F_SET(cursor, WT_CURSTD_KEY_INT); + else + WT_TRET(cursor->reset(cursor)); + CURSOR_UPDATE_API_END(session, ret); return (ret); } diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 748f4aa2473..ec150f39fc5 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -274,7 +274,6 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) btree = S2BT(session); /* Find out if we have to force a checkpoint. */ - force = false; WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval)); force = cval.val != 0; if (!force) { diff --git a/test/csuite/Makefile.am b/test/csuite/Makefile.am index e2b72532703..10ab890f2f5 100644 --- a/test/csuite/Makefile.am +++ b/test/csuite/Makefile.am @@ -4,8 +4,13 @@ LDADD = $(top_builddir)/test/utility/libtest_util.la \ $(top_builddir)/libwiredtiger.la AM_LDFLAGS = -static +noinst_PROGRAMS= + +test_scope_SOURCES = scope/main.c +noinst_PROGRAMS += test_scope + test_wt1965_col_efficiency_SOURCES = wt1965_col_efficiency/main.c -noinst_PROGRAMS = test_wt1965_col_efficiency +noinst_PROGRAMS += test_wt1965_col_efficiency test_wt2403_lsm_workload_SOURCES = wt2403_lsm_workload/main.c noinst_PROGRAMS += test_wt2403_lsm_workload diff --git a/test/csuite/scope/main.c b/test/csuite/scope/main.c new file mode 100644 index 00000000000..15dabd97c40 --- /dev/null +++ b/test/csuite/scope/main.c @@ -0,0 +1,288 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +#define KEY "key" +#define VALUE "value" + +static int ignore_errors; + +static int +handle_error(WT_EVENT_HANDLER *handler, + WT_SESSION *session, int error, const char *message) +{ + (void)(handler); + + /* Skip the error messages we're expecting to see. */ + if (ignore_errors > 0 && + (strstr(message, "requires key be set") != NULL || + strstr(message, "requires value be set") != NULL)) { + --ignore_errors; + return (0); + } + + (void)fprintf(stderr, "%s: %s\n", + message, session->strerror(session, error)); + return (0); +} + +static WT_EVENT_HANDLER event_handler = { + handle_error, + NULL, + NULL, + NULL +}; + +static void +cursor_scope_ops(WT_SESSION *session, const char *uri) +{ + struct { + const char *op; + enum { INSERT, SEARCH, SEARCH_NEAR, + REMOVE, REMOVE_POS, RESERVE, UPDATE } func; + const char *config; + } *op, ops[] = { + /* + * The ops order is fixed and shouldn't change, that is, insert + * has to happen first so search, update and remove operations + * are possible, and remove has to be last. + */ + { "insert", INSERT, NULL, }, + { "search", SEARCH, NULL, }, + { "search", SEARCH_NEAR, NULL, }, +#if 0 + { "reserve", RESERVE, NULL, }, +#endif + { "update", UPDATE, NULL, }, + { "remove", REMOVE, NULL, }, + { "remove", REMOVE_POS, NULL, }, + { NULL, INSERT, NULL } + }; + WT_CURSOR *cursor; + uint64_t keyr; + const char *key, *value; + char keybuf[100], valuebuf[100]; + int exact; + bool recno; + + /* Reserve requires a running transaction. */ + testutil_check(session->begin_transaction(session, NULL)); + + cursor = NULL; + for (op = ops; op->op != NULL; op++) { + key = value = NULL; + + /* Open a cursor. */ + if (cursor != NULL) + testutil_check(cursor->close(cursor)); + testutil_check(session->open_cursor( + session, uri, NULL, op->config, &cursor)); + recno = strcmp(cursor->key_format, "r") == 0; + + /* + * Set up application buffers so we can detect overwrites + * or failure to copy application information into library + * memory. + */ + if (recno) + cursor->set_key(cursor, (uint64_t)1); + else { + strcpy(keybuf, KEY); + cursor->set_key(cursor, keybuf); + } + strcpy(valuebuf, VALUE); + cursor->set_value(cursor, valuebuf); + + /* + * The application must keep key and value memory valid until + * the next operation that positions the cursor, modifies the + * data, or resets or closes the cursor. + * + * Modifying either the key or value buffers is not permitted. + */ + switch (op->func) { + case INSERT: + testutil_check(cursor->insert(cursor)); + break; + case SEARCH: + testutil_check(cursor->search(cursor)); + break; + case SEARCH_NEAR: + testutil_check(cursor->search_near(cursor, &exact)); + break; + case REMOVE_POS: + /* + * Remove has two modes, one where the remove is based + * on a cursor position, the other where it's based on + * a set key. The results are different, so test them + * separately. + */ + testutil_check(cursor->search(cursor)); + /* FALLTHROUGH */ + case REMOVE: + testutil_check(cursor->remove(cursor)); + break; + case RESERVE: +#if 0 + testutil_check(cursor->reserve(cursor)); +#endif + break; + case UPDATE: + testutil_check(cursor->update(cursor)); + break; + } + + /* + * The cursor should no longer reference application memory, + * and application buffers can be safely overwritten. + */ + memset(keybuf, 'K', sizeof(keybuf)); + memset(valuebuf, 'V', sizeof(valuebuf)); + + /* + * Check that get_key/get_value behave as expected after the + * operation. + */ + switch (op->func) { + case INSERT: + case REMOVE: + /* + * Insert and remove configured with a search key do + * not position the cursor and have no key or value. + * + * There should be two error messages, ignore them. + */ + ignore_errors = 2; + if (recno) + testutil_assert( + cursor->get_key(cursor, &keyr) != 0); + else + testutil_assert( + cursor->get_key(cursor, &key) != 0); + testutil_assert(cursor->get_value(cursor, &value) != 0); + testutil_assert(ignore_errors == 0); + break; + case REMOVE_POS: + /* + * Remove configured with a cursor position has a key, + * but no value. + * + * There should be one error message, ignore it. + */ + if (recno) { + testutil_assert( + cursor->get_key(cursor, &keyr) == 0); + testutil_assert(keyr == 1); + } else { + testutil_assert( + cursor->get_key(cursor, &key) == 0); + testutil_assert(key != keybuf); + testutil_assert(strcmp(key, KEY) == 0); + } + ignore_errors = 1; + testutil_assert(cursor->get_value(cursor, &value) != 0); + testutil_assert(ignore_errors == 0); + break; + case RESERVE: + case SEARCH: + case SEARCH_NEAR: + case UPDATE: + /* + * Reserve, search, search-near and update position the + * cursor and have both a key and value. + * + * Any key/value should not reference application + * memory. + */ + if (recno) { + testutil_assert( + cursor->get_key(cursor, &keyr) == 0); + testutil_assert(keyr == 1); + } else { + testutil_assert( + cursor->get_key(cursor, &key) == 0); + testutil_assert(key != keybuf); + testutil_assert(strcmp(key, KEY) == 0); + } + testutil_assert(cursor->get_value(cursor, &value) == 0); + testutil_assert(value != valuebuf); + testutil_assert(strcmp(value, VALUE) == 0); + break; + } + + /* + * We have more than one remove operation, add the key back + * in. + */ + if (op->func == REMOVE || op->func == REMOVE_POS) { + if (recno) + cursor->set_key(cursor, (uint64_t)1); + else { + cursor->set_key(cursor, KEY); + } + cursor->set_value(cursor, VALUE); + testutil_check(cursor->insert(cursor)); + } + } +} + +static void +run(WT_CONNECTION *conn, const char *uri, const char *config) +{ + WT_SESSION *session; + + testutil_check(conn->open_session(conn, NULL, NULL, &session)); + testutil_check(session->create(session, uri, config)); + cursor_scope_ops(session, uri); + testutil_check(session->close(session, NULL)); +} + +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + testutil_check(testutil_parse_opts(argc, argv, opts)); + testutil_make_work_dir(opts->home); + + testutil_check( + wiredtiger_open(opts->home, &event_handler, "create", &opts->conn)); + + run(opts->conn, "file:file.SS", "key_format=S,value_format=S"); + run(opts->conn, "file:file.rS", "key_format=r,value_format=S"); + run(opts->conn, "lsm:lsm.SS", "key_format=S,value_format=S"); + run(opts->conn, "lsm:lsm.rS", "key_format=r,value_format=S"); + run(opts->conn, "table:table.SS", "key_format=S,value_format=S"); + run(opts->conn, "table:table.rS", "key_format=r,value_format=S"); + + testutil_cleanup(opts); + + return (EXIT_SUCCESS); +} diff --git a/test/suite/test_cursor10.py b/test/suite/test_cursor10.py index b3cffeab4e9..6cabfde9f1f 100644 --- a/test/suite/test_cursor10.py +++ b/test/suite/test_cursor10.py @@ -31,11 +31,11 @@ from wtscenario import make_scenarios # test_cursor10.py # Cursors with projections. -class test_cursor04(wttest.WiredTigerTestCase): +class test_cursor10(wttest.WiredTigerTestCase): """ Test cursor search and search_near """ - table_name1 = 'test_cursor04' + table_name1 = 'test_cursor10' nentries = 20 scenarios = make_scenarios([ diff --git a/test/suite/test_cursor11.py b/test/suite/test_cursor11.py new file mode 100644 index 00000000000..e159ec499e6 --- /dev/null +++ b/test/suite/test_cursor11.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from wtdataset import SimpleDataSet, SimpleIndexDataSet +from wtdataset import SimpleLSMDataSet, ComplexDataSet, ComplexLSMDataSet +from wtscenario import make_scenarios + +# test_cursor11.py +# WT_CURSOR position tests: remove (if not already positioned), and insert +# leave the cursor without position or information. +class test_cursor11(wttest.WiredTigerTestCase): + + keyfmt = [ + ('integer', dict(keyfmt='i')), + ('recno', dict(keyfmt='r')), + ('string', dict(keyfmt='S')), + ] + types = [ + ('file', dict(uri='file', ds=SimpleDataSet)), + ('lsm', dict(uri='lsm', ds=SimpleDataSet)), + ('table-complex', dict(uri='table', ds=ComplexDataSet)), + ('table-complex-lsm', dict(uri='table', ds=ComplexLSMDataSet)), + ('table-index', dict(uri='table', ds=SimpleIndexDataSet)), + ('table-simple', dict(uri='table', ds=SimpleDataSet)), + ('table-simple-lsm', dict(uri='table', ds=SimpleLSMDataSet)), + ] + scenarios = make_scenarios(types, keyfmt) + + def skip(self): + return self.keyfmt == 'r' and \ + (self.ds.is_lsm() or self.uri == 'lsm') + + # Do a remove using the cursor after setting a position, and confirm + # the key and position remain set but no value. + def test_cursor_remove_with_position(self): + if self.skip(): + return + + # Build an object. + uri = self.uri + ':test_cursor11' + ds = self.ds(self, uri, 50, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + + c.set_key(ds.key(25)) + self.assertEquals(c.search(), 0) + self.assertEquals(c.next(), 0) + self.assertEquals(c.get_key(), ds.key(26)) + c.remove() + self.assertEquals(c.get_key(), ds.key(26)) + msg = '/requires value be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_value, msg) + self.assertEquals(c.next(), 0) + self.assertEquals(c.get_key(), ds.key(27)) + + # Do a remove using the cursor without setting a position, and confirm + # no key, value or position remains. + def test_cursor_remove_without_position(self): + if self.skip(): + return + + # Build an object. + uri = self.uri + ':test_cursor11' + ds = self.ds(self, uri, 50, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + + c.set_key(ds.key(25)) + c.remove() + msg = '/requires key be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_key, msg) + msg = '/requires value be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_value, msg) + self.assertEquals(c.next(), 0) + self.assertEquals(c.get_key(), ds.key(1)) + + # Do a remove using the key after also setting a position, and confirm + # no key, value or position remains. + def test_cursor_remove_with_key_and_position(self): + if self.skip(): + return + + # Build an object. + uri = self.uri + ':test_cursor11' + ds = self.ds(self, uri, 50, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + + c.set_key(ds.key(25)) + self.assertEquals(c.search(), 0) + c.set_key(ds.key(25)) + c.remove() + msg = '/requires key be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_key, msg) + msg = '/requires value be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_value, msg) + self.assertEquals(c.next(), 0) + self.assertEquals(c.get_key(), ds.key(1)) + + # Do an insert and confirm no key, value or position remains. + def test_cursor_insert(self): + if self.skip(): + return + + # Build an object. + uri = self.uri + ':test_cursor11' + ds = self.ds(self, uri, 50, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + + c.set_key(ds.key(25)) + c.set_value(ds.value(300)) + c.insert() + msg = '/requires key be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_key, msg) + msg = '/requires value be set/' + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, c.get_value, msg) + self.assertEquals(c.next(), 0) + self.assertEquals(c.get_key(), ds.key(1)) + +if __name__ == '__main__': + wttest.run() -- cgit v1.2.1 From b77f9cc3b7fe7c15445c13df9bef74f1dd39b991 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 16 Mar 2017 15:03:31 +1100 Subject: WT-3218 Avoid adding duplicate handles to connection dhandle list (#3331) * Recheck for existence after acquiring write lock when creating a new dhandle. * Add a wtperf workload that reproduced the original failure. --- bench/wtperf/runners/many-table-stress.wtperf | 19 +++++++++++++++++++ src/conn/conn_dhandle.c | 8 ++++++++ 2 files changed, 27 insertions(+) create mode 100644 bench/wtperf/runners/many-table-stress.wtperf diff --git a/bench/wtperf/runners/many-table-stress.wtperf b/bench/wtperf/runners/many-table-stress.wtperf new file mode 100644 index 00000000000..51d0bb0dd9d --- /dev/null +++ b/bench/wtperf/runners/many-table-stress.wtperf @@ -0,0 +1,19 @@ +# Create a set of tables with uneven distribution of data +conn_config="cache_size=1G,eviction=(threads_max=8),file_manager=(close_idle_time=100000),checkpoint=(wait=20,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000" +table_config="type=file" +table_count=5000 +icount=0 +random_range=1000000000 +pareto=10 +range_partition=true +report_interval=5 + +run_ops=1000000 +populate_threads=0 +icount=0 +threads=((count=60,inserts=1)) + +# Warn if a latency over 1 second is seen +max_latency=1000 +sample_interval=5 +sample_rate=1 diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 6c8d66d63f8..c5480897494 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -42,6 +42,14 @@ __wt_conn_dhandle_alloc( WT_DECL_RET; uint64_t bucket; + /* + * Ensure no one beat us to creating the handle now that we hold the + * write lock. + */ + if ((ret = + __wt_conn_dhandle_find(session, uri, checkpoint)) != WT_NOTFOUND) + return (ret); + WT_RET(__wt_calloc_one(session, &dhandle)); __wt_rwlock_init(session, &dhandle->rwlock); -- cgit v1.2.1 From 51d22616094e0a0d34997d26aec925adf949fbdf Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 16 Mar 2017 16:26:49 +1100 Subject: WT-3206 Fix a race allocating split generations. (#3332) We use split generations to detect when readers may be looking at structures that are replaced by a split. For correctness, we should only increment the global split generation *after* a split becomes public. Only then can we safely check that no thread is still reading with the old generation. Previously, a split could increment the global split generation, then a thread could start reading with the new split generation but see the old index structure. This issue was introduced by WT 3088, where we wanted a way to ensure that newly-allocated pages don't split until it is safe. That is solved here by having the split code pin a split generation in the ordinary way (without allocating a new one) for the duration that splits of new pages need to be prevented. --- src/btree/bt_split.c | 71 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 45550ff627f..6b2100ec7e3 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -187,7 +187,7 @@ __split_safe_free(WT_SESSION_IMPL *session, exclusive = true; if (exclusive) { - __wt_free(session, p); + __wt_overwrite_and_free_len(session, p, s); return (0); } @@ -640,12 +640,12 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) /* Start making real changes to the tree, errors are fatal. */ complete = WT_ERR_PANIC; - /* Get a generation for this split, mark the root page. */ - split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); - root->pg_intl_split_gen = split_gen; - - /* Prepare the WT_REFs for the move. */ - __split_ref_prepare(session, alloc_index, split_gen, false); + /* + * Prepare the WT_REFs for the move: this requires a stable split + * generation to block splits in newly created pages, so get one. + */ + WT_ENTER_PAGE_INDEX(session); + __split_ref_prepare(session, alloc_index, session->split_gen, false); /* * Confirm the root page's index hasn't moved, then update it, which @@ -655,6 +655,16 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) WT_INTL_INDEX_SET(root, alloc_index); alloc_index = NULL; + WT_LEAVE_PAGE_INDEX(session); + + /* + * Get a generation for this split, mark the root page. This must be + * after the new index is swapped into place in order to know that no + * readers are looking at the old index. + */ + split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); + root->pg_intl_split_gen = split_gen; + #ifdef HAVE_DIAGNOSTIC WT_WITH_PAGE_INDEX(session, ret = __split_verify_root(session, root)); @@ -825,10 +835,6 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, /* Start making real changes to the tree, errors are fatal. */ complete = WT_ERR_PANIC; - /* Get a generation for this split, mark the parent page. */ - split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); - parent->pg_intl_split_gen = split_gen; - /* * Confirm the parent page's index hasn't moved then update it, which * makes the split visible to threads descending the tree. @@ -837,6 +843,14 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, WT_INTL_INDEX_SET(parent, alloc_index); alloc_index = NULL; + /* + * Get a generation for this split, mark the page. This must be after + * the new index is swapped into place in order to know that no readers + * are looking at the old index. + */ + split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); + parent->pg_intl_split_gen = split_gen; + /* * If discarding the page's original WT_REF field, reset it to split. * Threads cursoring through the tree were blocked because that WT_REF @@ -1154,23 +1168,34 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) /* Start making real changes to the tree, errors are fatal. */ complete = WT_ERR_PANIC; - /* Get a generation for this split, mark the page. */ - split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); - page->pg_intl_split_gen = split_gen; - - /* Prepare the WT_REFs for the move. */ - __split_ref_prepare(session, alloc_index, split_gen, true); + /* + * Prepare the WT_REFs for the move: this requires a stable split + * generation to block splits in newly created pages, so get one. + */ + WT_ENTER_PAGE_INDEX(session); + __split_ref_prepare(session, alloc_index, session->split_gen, true); /* Split into the parent. */ - WT_ERR(__split_parent(session, page_ref, alloc_index->index, - alloc_index->entries, parent_incr, false, false)); + if ((ret = __split_parent(session, page_ref, alloc_index->index, + alloc_index->entries, parent_incr, false, false)) == 0) { + /* + * Confirm the page's index hasn't moved, then update it, which + * makes the split visible to threads descending the tree. + */ + WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(page) == pindex); + WT_INTL_INDEX_SET(page, replace_index); + } + + WT_LEAVE_PAGE_INDEX(session); + WT_ERR(ret); /* - * Confirm the page's index hasn't moved, then update it, which makes - * the split visible to threads descending the tree. + * Get a generation for this split, mark the parent page. This must be + * after the new index is swapped into place in order to know that no + * readers are looking at the old index. */ - WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(page) == pindex); - WT_INTL_INDEX_SET(page, replace_index); + split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); + page->pg_intl_split_gen = split_gen; #ifdef HAVE_DIAGNOSTIC WT_WITH_PAGE_INDEX(session, -- cgit v1.2.1 From 92327e8fd19c92ca5687f6e093553fb96c742688 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 16 Mar 2017 15:03:31 +1100 Subject: WT-3218 Avoid adding duplicate handles to connection dhandle list (#3331) * Recheck for existence after acquiring write lock when creating a new dhandle. * Add a wtperf workload that reproduced the original failure. (cherry picked from commit b77f9cc3b7fe7c15445c13df9bef74f1dd39b991) --- bench/wtperf/runners/many-table-stress.wtperf | 19 +++++++++++++++++++ src/conn/conn_dhandle.c | 8 ++++++++ 2 files changed, 27 insertions(+) create mode 100644 bench/wtperf/runners/many-table-stress.wtperf diff --git a/bench/wtperf/runners/many-table-stress.wtperf b/bench/wtperf/runners/many-table-stress.wtperf new file mode 100644 index 00000000000..51d0bb0dd9d --- /dev/null +++ b/bench/wtperf/runners/many-table-stress.wtperf @@ -0,0 +1,19 @@ +# Create a set of tables with uneven distribution of data +conn_config="cache_size=1G,eviction=(threads_max=8),file_manager=(close_idle_time=100000),checkpoint=(wait=20,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000" +table_config="type=file" +table_count=5000 +icount=0 +random_range=1000000000 +pareto=10 +range_partition=true +report_interval=5 + +run_ops=1000000 +populate_threads=0 +icount=0 +threads=((count=60,inserts=1)) + +# Warn if a latency over 1 second is seen +max_latency=1000 +sample_interval=5 +sample_rate=1 diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 866b8633f71..99213c5b557 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -38,6 +38,14 @@ __wt_conn_dhandle_alloc( WT_DECL_RET; uint64_t bucket; + /* + * Ensure no one beat us to creating the handle now that we hold the + * write lock. + */ + if ((ret = + __wt_conn_dhandle_find(session, uri, checkpoint)) != WT_NOTFOUND) + return (ret); + WT_RET(__wt_calloc_one(session, &dhandle)); __wt_rwlock_init(session, &dhandle->rwlock); -- cgit v1.2.1 From cc2f15f595b16479affd73791c207da334453bcc Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 16 Mar 2017 16:26:49 +1100 Subject: WT-3206 Fix a race allocating split generations. (#3332) We use split generations to detect when readers may be looking at structures that are replaced by a split. For correctness, we should only increment the global split generation *after* a split becomes public. Only then can we safely check that no thread is still reading with the old generation. Previously, a split could increment the global split generation, then a thread could start reading with the new split generation but see the old index structure. This issue was introduced by WT 3088, where we wanted a way to ensure that newly-allocated pages don't split until it is safe. That is solved here by having the split code pin a split generation in the ordinary way (without allocating a new one) for the duration that splits of new pages need to be prevented. (cherry picked from commit 51d22616094e0a0d34997d26aec925adf949fbdf) --- src/btree/bt_split.c | 71 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 45550ff627f..6b2100ec7e3 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -187,7 +187,7 @@ __split_safe_free(WT_SESSION_IMPL *session, exclusive = true; if (exclusive) { - __wt_free(session, p); + __wt_overwrite_and_free_len(session, p, s); return (0); } @@ -640,12 +640,12 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) /* Start making real changes to the tree, errors are fatal. */ complete = WT_ERR_PANIC; - /* Get a generation for this split, mark the root page. */ - split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); - root->pg_intl_split_gen = split_gen; - - /* Prepare the WT_REFs for the move. */ - __split_ref_prepare(session, alloc_index, split_gen, false); + /* + * Prepare the WT_REFs for the move: this requires a stable split + * generation to block splits in newly created pages, so get one. + */ + WT_ENTER_PAGE_INDEX(session); + __split_ref_prepare(session, alloc_index, session->split_gen, false); /* * Confirm the root page's index hasn't moved, then update it, which @@ -655,6 +655,16 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) WT_INTL_INDEX_SET(root, alloc_index); alloc_index = NULL; + WT_LEAVE_PAGE_INDEX(session); + + /* + * Get a generation for this split, mark the root page. This must be + * after the new index is swapped into place in order to know that no + * readers are looking at the old index. + */ + split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); + root->pg_intl_split_gen = split_gen; + #ifdef HAVE_DIAGNOSTIC WT_WITH_PAGE_INDEX(session, ret = __split_verify_root(session, root)); @@ -825,10 +835,6 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, /* Start making real changes to the tree, errors are fatal. */ complete = WT_ERR_PANIC; - /* Get a generation for this split, mark the parent page. */ - split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); - parent->pg_intl_split_gen = split_gen; - /* * Confirm the parent page's index hasn't moved then update it, which * makes the split visible to threads descending the tree. @@ -837,6 +843,14 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, WT_INTL_INDEX_SET(parent, alloc_index); alloc_index = NULL; + /* + * Get a generation for this split, mark the page. This must be after + * the new index is swapped into place in order to know that no readers + * are looking at the old index. + */ + split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); + parent->pg_intl_split_gen = split_gen; + /* * If discarding the page's original WT_REF field, reset it to split. * Threads cursoring through the tree were blocked because that WT_REF @@ -1154,23 +1168,34 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) /* Start making real changes to the tree, errors are fatal. */ complete = WT_ERR_PANIC; - /* Get a generation for this split, mark the page. */ - split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); - page->pg_intl_split_gen = split_gen; - - /* Prepare the WT_REFs for the move. */ - __split_ref_prepare(session, alloc_index, split_gen, true); + /* + * Prepare the WT_REFs for the move: this requires a stable split + * generation to block splits in newly created pages, so get one. + */ + WT_ENTER_PAGE_INDEX(session); + __split_ref_prepare(session, alloc_index, session->split_gen, true); /* Split into the parent. */ - WT_ERR(__split_parent(session, page_ref, alloc_index->index, - alloc_index->entries, parent_incr, false, false)); + if ((ret = __split_parent(session, page_ref, alloc_index->index, + alloc_index->entries, parent_incr, false, false)) == 0) { + /* + * Confirm the page's index hasn't moved, then update it, which + * makes the split visible to threads descending the tree. + */ + WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(page) == pindex); + WT_INTL_INDEX_SET(page, replace_index); + } + + WT_LEAVE_PAGE_INDEX(session); + WT_ERR(ret); /* - * Confirm the page's index hasn't moved, then update it, which makes - * the split visible to threads descending the tree. + * Get a generation for this split, mark the parent page. This must be + * after the new index is swapped into place in order to know that no + * readers are looking at the old index. */ - WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(page) == pindex); - WT_INTL_INDEX_SET(page, replace_index); + split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); + page->pg_intl_split_gen = split_gen; #ifdef HAVE_DIAGNOSTIC WT_WITH_PAGE_INDEX(session, -- cgit v1.2.1 From 6203106c56504f194bab7093b28c45ae7beb9cac Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 16 Mar 2017 07:49:08 -0400 Subject: WT-3204 eviction changes cost LSM performance (#3325) * WT-3204 eviction changes cost LSM performance Modify LSM's primary chunk switching to match the new btree eviction semantics on object creation. We now create objects with eviction turned off, LSM should no longer have to turn eviction off when configuring the primary chunk. LSM previously set WT_BTREE.bulk_load_ok to false to ensure an insert into the tree wouldn't turn eviction on. That problem remains, but there's a race in the implementation if multiple threads are inserting at the same time (where a thread modifies WT_BTREE.bulk_load_ok and goes to sleep before configuring eviction, and another thread does an insert and turns off eviction), and there's a further race between threads doing F_ISSET/F_SET tests. Change the WT_BTREE_LSM_PRIMARY flag into a WT_BTREE.lsm_primary variable so there's no F_ISSET/F_SET race. Remove the test/set of bulk-load_ok, instead, test the lsm_primary value in the btree code before turning eviction off. When checkpointing an LSM chunk, move the code that turns off the chunk's primary flag in the chunk inside the single-threaded part of the function to ensure we don't race with other threads doing checkpoints. That makes the code to fix up the accounting single-threaded and safe. Simplify the LSM checkpoint code to call __wt_checkpoint directly, and use the same handle for turning off the chunk's primary flag as we use for the checkpoint. * Force a primary switch in LSM after an exclusive-handle operation has come through. Otherwise it's possible to attempt to use a file as the primary chunk without disabling eviction. * spelling * WT_BTREE.bulk_load_ok isn't a boolean, don't use true/false comparisons. * Only check for an empty tree the first time an LSM chunk is opened. The goal here is to make sure that LSM primary chunks start empty. Otherwise, we can't load into a skiplist in memory as required by LSM. If an operation such as verify closes a btree in order to check the on-disk state, the next time it is opened we have to check whether it is empty. It is safe to do this check without locking: what matters is that we always do the `lsm_primary` check before any update operation that would turn off `btree->bulk_load_ok`. * Rename WT_BTREE.bulk_load_ok to be WT_BTREE.original, it's used by LSM. * Fix a comment. --- src/btree/bt_cursor.c | 24 ++++++---- src/btree/bt_handle.c | 4 +- src/include/btree.h | 9 ++-- src/include/btree.i | 60 ++----------------------- src/lsm/lsm_cursor.c | 31 ++++++++----- src/lsm/lsm_work_unit.c | 109 ++++++++++++++++++++++++++++++++++------------ src/reconcile/rec_write.c | 3 +- src/txn/txn_ckpt.c | 2 +- 8 files changed, 130 insertions(+), 112 deletions(-) diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 3ae6e022906..d6dc0991d3f 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -126,15 +126,23 @@ static inline void __cursor_disable_bulk(WT_SESSION_IMPL *session, WT_BTREE *btree) { /* - * Once a tree is no longer empty, eviction should pay attention to it, - * and it's no longer possible to bulk-load into it. - * - * We use a compare-and-swap here to avoid races among the first - * inserts into a tree. Eviction is disabled when an empty tree is - * opened, it must only be enabled once. + * Once a tree (other than the LSM primary) is no longer empty, eviction + * should pay attention to it, and it's no longer possible to bulk-load + * into it. + */ + if (!btree->original) + return; + if (btree->lsm_primary) { + btree->original = 0; /* Make the next test faster. */ + return; + } + + /* + * We use a compare-and-swap here to avoid races among the first inserts + * into a tree. Eviction is disabled when an empty tree is opened, and + * it must only be enabled once. */ - if (btree->bulk_load_ok && - __wt_atomic_cas8(&btree->bulk_load_ok, 1, 0)) + if (__wt_atomic_cas8(&btree->original, 1, 0)) __wt_evict_file_exclusive_off(session); } diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index ff199eb1e0e..f2bffee06da 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -188,7 +188,7 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) * Special operations don't enable eviction. (The underlying commands * may turn on eviction, but it's their decision.) */ - if (btree->bulk_load_ok || + if (btree->original || F_ISSET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_REBALANCE | WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) WT_ERR(__wt_evict_file_exclusive_on(session)); @@ -562,7 +562,7 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, bool creation) * tree. */ if (creation) - btree->bulk_load_ok = 1; + btree->original = 1; /* * A note about empty trees: the initial tree is a single root page. diff --git a/src/include/btree.h b/src/include/btree.h index 857dc6694c5..15a68474fdf 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -118,11 +118,13 @@ struct __wt_btree { uint64_t last_recno; /* Column-store last record number */ - WT_REF root; /* Root page reference */ - bool modified; /* If the tree ever modified */ - uint8_t bulk_load_ok; /* Bulk-load is a possibility + WT_REF root; /* Root page reference */ + bool modified; /* If the tree ever modified */ + uint8_t original; /* Newly created: bulk-load possible (want a bool but needs atomic cas) */ + bool lsm_primary; /* Handle is/was the LSM primary */ + WT_BM *bm; /* Block manager reference */ u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */ @@ -160,7 +162,6 @@ struct __wt_btree { #define WT_BTREE_IGNORE_CACHE 0x000400 /* Cache-resident object */ #define WT_BTREE_IN_MEMORY 0x000800 /* Cache-resident object */ #define WT_BTREE_LOOKASIDE 0x001000 /* Look-aside table */ -#define WT_BTREE_LSM_PRIMARY 0x002000 /* Handle is current LSM primary */ #define WT_BTREE_NO_CHECKPOINT 0x004000 /* Disable checkpoints */ #define WT_BTREE_NO_LOGGING 0x008000 /* Disable logging */ #define WT_BTREE_NO_RECONCILE 0x010000 /* Allow splits, even with no evict */ diff --git a/src/include/btree.i b/src/include/btree.i index cec6f67e9bd..c0c5c7c5a8d 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -149,7 +149,7 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) if (WT_PAGE_IS_INTERNAL(page)) { (void)__wt_atomic_add64(&btree->bytes_dirty_intl, size); (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size); - } else if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { + } else if (!btree->lsm_primary) { (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); } @@ -285,7 +285,7 @@ __wt_cache_page_byte_dirty_decr( decr, "WT_BTREE.bytes_dirty_intl"); __wt_cache_decr_check_uint64(session, &cache->bytes_dirty_intl, decr, "WT_CACHE.bytes_dirty_intl"); - } else if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { + } else if (!btree->lsm_primary) { __wt_cache_decr_check_uint64(session, &btree->bytes_dirty_leaf, decr, "WT_BTREE.bytes_dirty_leaf"); __wt_cache_decr_check_uint64(session, &cache->bytes_dirty_leaf, @@ -345,7 +345,7 @@ __wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page) (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size); (void)__wt_atomic_add64(&cache->pages_dirty_intl, 1); } else { - if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { + if (!btree->lsm_primary) { (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); } @@ -444,7 +444,7 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) __wt_cache_decr_zero_uint64(session, &cache->bytes_dirty_intl, modify->bytes_dirty, "WT_CACHE.bytes_dirty_intl"); - } else if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { + } else if (!btree->lsm_primary) { __wt_cache_decr_zero_uint64(session, &btree->bytes_dirty_leaf, modify->bytes_dirty, "WT_BTREE.bytes_dirty_leaf"); @@ -1545,58 +1545,6 @@ __wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize) return (child->memory_footprint > maxsize); } -/* - * __wt_btree_lsm_switch_primary -- - * Switch a btree handle to/from the current primary chunk of an LSM tree. - */ -static inline int -__wt_btree_lsm_switch_primary(WT_SESSION_IMPL *session, bool on) -{ - WT_BTREE *btree; - WT_CACHE *cache; - WT_PAGE *child, *root; - WT_PAGE_INDEX *pindex; - WT_REF *first; - size_t size; - - btree = S2BT(session); - cache = S2C(session)->cache; - root = btree->root.page; - - if (!F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { - F_SET(btree, WT_BTREE_LSM_PRIMARY); - WT_RET(__wt_evict_file_exclusive_on(session)); - } - if (!on && F_ISSET(btree, WT_BTREE_LSM_PRIMARY)) { - pindex = WT_INTL_INDEX_GET_SAFE(root); - if (btree->evict_disabled == 0 || pindex->entries != 1) - return (0); - first = pindex->index[0]; - - /* - * We're reaching down into the page without a hazard pointer, - * but that's OK because we know that no-eviction is set so the - * page can't disappear. - * - * While this tree was the primary, its dirty bytes were not - * included in the cache accounting. Fix that now before we - * open it up for eviction. - */ - child = first->page; - if (first->state == WT_REF_MEM && - child->type == WT_PAGE_ROW_LEAF && - __wt_page_is_modified(child)) { - size = child->modify->bytes_dirty; - (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); - (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); - } - - F_CLR(btree, WT_BTREE_LSM_PRIMARY); - __wt_evict_file_exclusive_off(session); - } - return (0); -} - /* * __wt_split_descent_race -- * Return if we raced with an internal page split when descending the tree. diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 77fa96ebdfd..bd1daaa6915 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -688,20 +688,29 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) { if (chunk != NULL && !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && chunk->switch_txn == WT_TXN_NONE) { - clsm->primary_chunk = chunk; primary = clsm->chunks[clsm->nchunks - 1]->cursor; + btree = ((WT_CURSOR_BTREE *)primary)->btree; + /* - * Disable eviction for the in-memory chunk. Also clear the - * bulk load flag here, otherwise eviction will be enabled by - * the first update. + * If the primary is not yet set as the primary, do that now. + * Note that eviction was configured off when the underlying + * object was created, which is what we want, leave it alone. + * + * We don't have to worry about races here: every thread that + * modifies the tree will have to come through here, at worse + * we set the flag repeatedly. We don't use a WT_BTREE handle + * flag, however, we could race doing the read-modify-write of + * the flags field. + * + * If something caused the chunk to be closed and reopened + * since it was created, we can no longer use it as a primary + * chunk and we need to force a switch. We detect the tree was + * created when it was opened by checking the "original" flag. */ - btree = ((WT_CURSOR_BTREE *)(primary))->btree; - if (btree->bulk_load_ok) { - btree->bulk_load_ok = false; - WT_WITH_BTREE(session, btree, - ret = __wt_btree_lsm_switch_primary(session, true)); - WT_ERR(ret); - } + if (!btree->lsm_primary && btree->original) + btree->lsm_primary = true; + if (btree->lsm_primary) + clsm->primary_chunk = chunk; } clsm->dsk_gen = lsm_tree->dsk_gen; diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index c9c350c5ac9..0b0801a8cca 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -255,6 +255,51 @@ err: return (ret); } +/* + * __lsm_switch_primary_off -- + * Switch when a btree handle is no longer the current primary chunk of + * an LSM tree. + */ +static void +__lsm_switch_primary_off(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + WT_CACHE *cache; + WT_PAGE *child, *root; + WT_PAGE_INDEX *pindex; + WT_REF *first; + size_t size; + + btree = S2BT(session); + cache = S2C(session)->cache; + root = btree->root.page; + pindex = WT_INTL_INDEX_GET_SAFE(root); + + /* Diagnostic: assert we've never split. */ + WT_ASSERT(session, pindex->entries == 1); + + /* + * We're reaching down into the page without a hazard pointer, + * but that's OK because we know that no-eviction is set so the + * page can't disappear. + * + * While this tree was the primary, its dirty bytes were not + * included in the cache accounting. Fix that now before we + * open it up for eviction. + */ + first = pindex->index[0]; + child = first->page; + if (first->state == WT_REF_MEM && + child->type == WT_PAGE_ROW_LEAF && __wt_page_is_modified(child)) { + size = child->modify->bytes_dirty; + (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); + (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); + } + + /* Configure eviction. */ + __wt_evict_file_exclusive_off(session); +} + /* * __wt_lsm_checkpoint_chunk -- * Flush a single LSM chunk to disk. @@ -263,11 +308,12 @@ int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) { + WT_BTREE *btree; WT_DECL_RET; WT_TXN_ISOLATION saved_isolation; - bool flush_set; + bool flush_set, release_btree; - flush_set = false; + flush_set = release_btree = false; /* * If the chunk is already checkpointed, make sure it is also evicted. @@ -318,20 +364,18 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, * We can wait here for checkpoints and fsyncs to complete, which can * take a long time. */ - if ((ret = __wt_session_get_btree( - session, chunk->uri, NULL, NULL, 0)) == 0) { - /* - * Set read-uncommitted: we have already checked that all of the - * updates in this chunk are globally visible, use the cheapest - * possible check in reconciliation. - */ - saved_isolation = session->txn.isolation; - session->txn.isolation = WT_ISO_READ_UNCOMMITTED; - ret = __wt_cache_op(session, WT_SYNC_WRITE_LEAVES); - session->txn.isolation = saved_isolation; - WT_TRET(__wt_session_release_btree(session)); - } - WT_ERR(ret); + WT_ERR(__wt_session_get_btree(session, chunk->uri, NULL, NULL, 0)); + release_btree = true; + + /* + * Set read-uncommitted: we have already checked that all of the updates + * in this chunk are globally visible, use the cheapest possible check + * in reconciliation. + */ + saved_isolation = session->txn.isolation; + session->txn.isolation = WT_ISO_READ_UNCOMMITTED; + WT_ERR(__wt_cache_op(session, WT_SYNC_WRITE_LEAVES)); + session->txn.isolation = saved_isolation; __wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointing %s", chunk->uri); @@ -348,12 +392,28 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_ERR(__wt_meta_track_on(session)); WT_WITH_CHECKPOINT_LOCK(session, WT_WITH_SCHEMA_LOCK(session, - ret = __wt_schema_worker( - session, chunk->uri, __wt_checkpoint, NULL, NULL, 0))); + ret = __wt_checkpoint(session, NULL))); WT_TRET(__wt_meta_track_off(session, false, ret != 0)); if (ret != 0) WT_ERR_MSG(session, ret, "LSM checkpoint"); + /* + * If the chunk is the lsm primary, clear the no-eviction flag so it can + * be evicted and eventually closed. Only do once, and only do after the + * checkpoint has succeeded: otherwise, accessing the leaf page during + * the checkpoint can trigger forced eviction. + * + * We don't have to worry about races here, we're single-threaded. + */ + btree = S2BT(session); + if (btree->lsm_primary) { + __lsm_switch_primary_off(session); + btree->lsm_primary = false; + } + + release_btree = false; + WT_ERR(__wt_session_release_btree(session)); + /* Now the file is written, get the chunk size. */ WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk)); @@ -376,17 +436,6 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_PUBLISH(chunk->flushing, 0); flush_set = false; - /* - * Clear the no-eviction flag so the primary can be evicted and - * eventually closed. Only do this once the checkpoint has succeeded: - * otherwise, accessing the leaf page during the checkpoint can trigger - * forced eviction. - */ - WT_ERR(__wt_session_get_btree(session, chunk->uri, NULL, NULL, 0)); - WT_TRET(__wt_btree_lsm_switch_primary(session, false)); - WT_TRET(__wt_session_release_btree(session)); - WT_ERR(ret); - /* Make sure we aren't pinning a transaction ID. */ __wt_txn_release_snapshot(session); @@ -403,6 +452,8 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, err: if (flush_set) WT_PUBLISH(chunk->flushing, 0); + if (release_btree) + WT_TRET(__wt_session_release_btree(session)); return (ret); } diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index a667a288187..88d4397fcb5 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -3583,11 +3583,12 @@ __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) uint64_t recno; btree = S2BT(session); + /* * Bulk-load is only permitted on newly created files, not any empty * file -- see the checkpoint code for a discussion. */ - if (!btree->bulk_load_ok) + if (!btree->original) WT_RET_MSG(session, EINVAL, "bulk-load is only possible for newly created trees"); diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index ec150f39fc5..80cdf1cd39b 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -1420,7 +1420,7 @@ __checkpoint_tree( * delete a physical checkpoint, and that will end in tears. */ if (is_checkpoint) - if (btree->bulk_load_ok) { + if (btree->original) { fake_ckpt = true; goto fake; } -- cgit v1.2.1 From 6a3ee4ea9986ff2a7446c4774b04423673165c57 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 16 Mar 2017 11:42:07 -0400 Subject: WT-3225 WiredTiger won't build with clang on CentOS 7.3.1611 (#3333) * WT-3225 WiredTiger won't build with clang on CentOS 7.3.1611 Casting the call's return to int is because CentOS 7.3.1611 complains about syscall returning a long and the loss of integer precision in the assignment to ret. The cast should be a no-op everywhere. * On Centos 7.3.1611, system header files aren't compatible with -Wdisabled-macro-expansion. I don't see a big reason for having that warning, so I'm turning it off generally. Add -Wuninitialized to WiredTiger's gcc builds. --- build_posix/aclocal/strict.m4 | 5 +++++ dist/s_string.ok | 2 ++ src/include/os.h | 8 +++++++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/build_posix/aclocal/strict.m4 b/build_posix/aclocal/strict.m4 index c107dd017d7..659867fa69e 100644 --- a/build_posix/aclocal/strict.m4 +++ b/build_posix/aclocal/strict.m4 @@ -31,6 +31,7 @@ AC_DEFUN([AM_GCC_WARNINGS], [ w="$w -Wstrict-prototypes" w="$w -Wswitch-enum" w="$w -Wundef" + w="$w -Wuninitialized" w="$w -Wunreachable-code" w="$w -Wunsafe-loop-optimizations" w="$w -Wunused" @@ -66,6 +67,10 @@ AC_DEFUN([AM_CLANG_WARNINGS], [ # w="$w -Wno-error=cast-qual" w="$w -Wno-cast-qual" + # On Centos 7.3.1611, system header files aren't compatible with + # -Wdisabled-macro-expansion. + w="$w -Wno-disabled-macro-expansion" + case "$1" in *Apple*clang*version*4.1*) # Apple clang has its own numbering system, and older OS X diff --git a/dist/s_string.ok b/dist/s_string.ok index cdfa4aec968..39b6b163cd9 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -68,6 +68,7 @@ CURSORs CURSTD CallsCustDate Castagnoli +CentOS Checkpointing Checksum Checksums @@ -1148,6 +1149,7 @@ sw sx sy sys +syscall sz t's tV diff --git a/src/include/os.h b/src/include/os.h index 7a8e47ed81f..8505649a1fd 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -11,8 +11,14 @@ * A call returning 0 indicates success; any call where \ * 0 is not the only successful return must provide an \ * expression evaluating to 0 in all successful cases. \ + * \ + * XXX \ + * Casting the call's return to int is because CentOS 7.3.1611 \ + * complains about syscall returning a long and the loss of \ + * integer precision in the assignment to ret. The cast should \ + * be a no-op everywhere. \ */ \ - if (((ret) = (call)) == 0) \ + if (((ret) = (int)(call)) == 0) \ break; \ /* \ * The call's error was either returned by the call or \ -- cgit v1.2.1 From c9b353c33631725e633e146f87c1c92e32a5def3 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Thu, 16 Mar 2017 14:38:05 -0400 Subject: WT-3218 Reduce to 2k tables so Jenkins doesn't hit open file ulimit. (#3334) --- bench/wtperf/runners/many-table-stress.wtperf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench/wtperf/runners/many-table-stress.wtperf b/bench/wtperf/runners/many-table-stress.wtperf index 51d0bb0dd9d..6cf1d5d2696 100644 --- a/bench/wtperf/runners/many-table-stress.wtperf +++ b/bench/wtperf/runners/many-table-stress.wtperf @@ -1,7 +1,7 @@ # Create a set of tables with uneven distribution of data conn_config="cache_size=1G,eviction=(threads_max=8),file_manager=(close_idle_time=100000),checkpoint=(wait=20,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000" table_config="type=file" -table_count=5000 +table_count=2000 icount=0 random_range=1000000000 pareto=10 -- cgit v1.2.1 From 4e47a53801a7bd54e323d9899905a69340ed8dfb Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Thu, 16 Mar 2017 23:45:17 -0400 Subject: WT-3212 Table cursors should not free memory owned by the table. (#3327) --- src/cursor/cur_table.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c index 98dbbec8981..72eec177449 100644 --- a/src/cursor/cur_table.c +++ b/src/cursor/cur_table.c @@ -1015,11 +1015,15 @@ __wt_curtable_open(WT_SESSION_IMPL *session, if (0) { err: if (*cursorp != NULL) { - if (*cursorp != cursor) - WT_TRET(__wt_cursor_close(*cursorp)); + /* + * When a dump cursor is opened, then *cursorp, not + * cursor, is the dump cursor. Close the dump cursor, + * and the table cursor will be closed as its child. + */ + cursor = *cursorp; *cursorp = NULL; } - WT_TRET(__curtable_close(cursor)); + WT_TRET(cursor->close(cursor)); } __wt_scr_free(session, &tmp); -- cgit v1.2.1 From 65ab67ed8d9777285dedf89cc506b9cffc52942e Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Thu, 16 Mar 2017 23:47:18 -0400 Subject: WT-2978 Python: make a pip-compatible installer. (#3320) * Build a static library with -fPIC objects, suitable for pulling into a dynamic library. Distribute our SWIG results, rather than running SWIG on the target machine. * Added builtin support for snappy and zlib. Made it easy to manage the list of builtins. --- lang/python/setup_pip.py | 408 +++++++++++++++++++++++++++++++++++++ lang/python/wiredtiger/pip_init.py | 48 +++++ 2 files changed, 456 insertions(+) create mode 100644 lang/python/setup_pip.py create mode 100644 lang/python/wiredtiger/pip_init.py diff --git a/lang/python/setup_pip.py b/lang/python/setup_pip.py new file mode 100644 index 00000000000..636eecab80a --- /dev/null +++ b/lang/python/setup_pip.py @@ -0,0 +1,408 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +# This script builds a Python source distribution that can built be installed +# via pip install. This must be run in a git repository to determine the files +# to package. Also as a prerequisite, SWIG must be run as the generated files +# are part of the package. To create the distribution, in this directory, run +# "python setup_pip.py sdist", this creates a tar.gz file under ./dist . +from __future__ import print_function +import os, os.path, re, shutil, site, sys +from setuptools import setup, Distribution +from distutils.extension import Extension +import distutils.sysconfig +import distutils.ccompiler +from distutils.errors import CompileError, LinkError +import subprocess +from subprocess import call +import setuptools.command.install +import setuptools.command.build_ext + +# msg -- +# Print a message to stderr. +def msg(s): + print(os.path.basename(__file__) + ": " + s, file=sys.stderr) + +# die -- +# For failures, show a message and exit. +def die(s): + msg(s) + sys.exit(1) + +# build_commands -- +# Run a sequence of commands, and die if any fail. +def build_commands(commands, build_dir, build_env): + for command in commands: + callargs = [ 'sh', '-c', command ] + verbose_command = '"' + '" "'.join(callargs) + '"' + print('running: ' + verbose_command) + if call(callargs, cwd=build_dir, env=build_env) != 0: + die('build command failed: ' + verbose_command) + +# check_needed_dependencies -- +# Make a quick check of any needed library dependencies, and +# add to the library path and include path as needed. If a library +# is not found, it is not definitive. +def check_needed_dependencies(builtins, inc_paths, lib_paths): + library_dirs = get_library_dirs() + compiler = distutils.ccompiler.new_compiler() + distutils.sysconfig.customize_compiler(compiler) + compiler.set_library_dirs(library_dirs) + missing = [] + for name, libname, instructions in builtins: + found = compiler.find_library_file(library_dirs, libname) + if found is None: + msg(libname + ": missing") + msg(instructions) + msg("after installing it, set LD_LIBRARY_PATH or DYLD_LIBRARY_PATH") + missing.append(libname) + else: + package_top = os.path.dirname(os.path.dirname(found)) + inc_paths.append(os.path.join(package_top, 'include')) + lib_paths.append(os.path.join(package_top, 'lib')) + + # XXX: we are not accounting for other directories that might be + # discoverable via /sbin/ldconfig. It might be better to write a tiny + # compile using -lsnappy, -lz... + # + #if len(missing) > 0: + # die("install packages for: " + str(missing)) + +# find_executable -- +# Locate an executable in the PATH. +def find_executable(exename, path): + p = subprocess.Popen(['which', exename ], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out, err = p.communicate('') + out = str(out) # needed for Python3 + if out == '': + if err != '': + err = ': "' + err + '"' + die('"' + exename + '": not found in path' + err) + dirname = os.path.dirname(out) + if not dirname in path: + path.append(dirname) + +# get_build_path -- +# Create a PATH that can be used for installation. Apparently, +# installation commands are run with a restricted PATH, and +# autoreconf/aclocal will not normally be found. +def get_build_path(): + build_paths = [] + find_executable('autoreconf', build_paths) + find_executable('aclocal', build_paths) + build_path = os.environ['PATH'] + ':' + ':'.join(build_paths) + return build_path + +# get_compile_flags -- +# Get system specific compile flags. Return a triple: C preprocessor +# flags, C compilation flags and linker flags. +def get_compile_flags(inc_paths, lib_paths): + # Suppress warnings building SWIG generated code + if sys.platform == 'win32' and cc == 'msvc': + cflags = ['/arch:SSE2', '/EHsc'] + cppflags = [] + ldflags = [] + # Windows untested and incomplete, don't claim that it works. + die('Windows is not supported by this setup script') + else: + cflags = [ '-w', '-Wno-sign-conversion', '-std=c11' ] + cppflags = ['-I' + path for path in inc_paths] + cppflags.append('-DHAVE_CONFIG_H') + ldflags = ['-L' + path for path in lib_paths] + if sys.platform == 'darwin': + cflags.extend([ '-arch', 'x86_64' ]) + return (cppflags, cflags, ldflags) + +# get_sources_curdir -- +# Get a list of sources from the current directory +def get_sources_curdir(): + DEVNULL = open(os.devnull, 'w') + gitproc = subprocess.Popen( + ['git', 'ls-tree', '-r', '--name-only', 'HEAD^{tree}'], + stdin=DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + sources = [line.rstrip() for line in gitproc.stdout.readlines()] + err = gitproc.stderr.read() + gitproc.wait() + subret = gitproc.returncode + if subret != 0 or err: + msg("git command to get sources returned " + str(subret) + + ", error=" + str(err)) + die("this command must be run in a git repository") + return sources + +# get_wiredtiger_versions -- +# Read the version information from the RELEASE_INFO file. +def get_wiredtiger_versions(wt_dir): + v = {} + for l in open(os.path.join(wt_dir, 'RELEASE_INFO')): + if re.match(r'WIREDTIGER_VERSION_(?:MAJOR|MINOR|PATCH)=', l): + exec(l, v) + wt_ver = '%d.%d' % (v['WIREDTIGER_VERSION_MAJOR'], + v['WIREDTIGER_VERSION_MINOR']) + wt_full_ver = wt_ver + '.%d' % (v['WIREDTIGER_VERSION_PATCH']) + return (wt_ver, wt_full_ver) + +# get_library_dirs +# Build a plausible set of library directories. +def get_library_dirs(): + dirs = [] + dirs.append("/usr/local/lib") + dirs.append("/usr/local/lib64") + dirs.append("/lib/x86_64-linux-gnu") + dirs.append("/opt/local/lib") + dirs.append("/usr/lib") + dirs.append("/usr/lib64") + for path in ['LD_LIBRARY_PATH', 'DYLD_LIBRARY_PATH', 'LIBRARY_PATH']: + if path in os.environ: + dirs.extend(os.environ[path].split(':')) + dirs = list(set(filter(os.path.isdir, dirs))) + return dirs + +# source_filter +# Make any needed changes to the sources list. Any entry that +# needs to be moved is returned in a dictionary. +def source_filter(sources): + result = [] + movers = dict() + py_dir = os.path.join('lang', 'python') + pywt_dir = os.path.join(py_dir, 'wiredtiger') + pywt_prefix = pywt_dir + os.path.sep + for f in sources: + if not re.match(source_regex, f): + continue + src = f + dest = f + # move all lang/python files to the top level. + if dest.startswith(pywt_prefix): + dest = os.path.basename(dest) + if dest == 'pip_init.py': + dest = '__init__.py' + if dest != src: + movers[dest] = src + result.append(dest) + # Add SWIG generated files + result.append('wiredtiger.py') + movers['wiredtiger.py'] = os.path.join(pywt_dir, '__init__.py') + result.append(os.path.join(py_dir, 'wiredtiger_wrap.c')) + return result, movers + +################################################################ +# Do some initial setup and checks. +this_abs_script = os.path.abspath(__file__) +this_dir = os.path.dirname(this_abs_script) +pip_command = None +for arg in sys.argv[1:]: + if arg[0] != '-' and pip_command == None: + pip_command = arg + break + +if this_dir.endswith(os.sep + os.path.join('lang', 'python')): + wt_dir = os.path.dirname(os.path.dirname(this_dir)) + os.chdir(wt_dir) +elif os.path.isfile(os.path.join(this_dir, 'LICENSE')): + wt_dir = this_dir +else: + die('running from an unknown directory') + +python3 = (sys.version_info[0] > 2) +if python3: + die('Python3 is not yet supported') + +# Ensure that Extensions won't be built for 32 bit, +# that won't work with WiredTiger. +if sys.maxsize < 2**32: + die('need to be running on a 64 bit system, and have a 64 bit Python') + +python_rel_dir = os.path.join('lang', 'python') +build_dir = os.path.join(wt_dir, 'build_posix') +makefile = os.path.join(build_dir, 'Makefile') +built_sentinal = os.path.join(build_dir, 'built.txt') +conf_make_dir = 'build_posix' +wt_swig_lib_name = os.path.join(python_rel_dir, '_wiredtiger.so') + +################################################################ +# Put together build options for the WiredTiger extension. +short_description = 'high performance, scalable, production quality, ' + \ + 'NoSQL, Open Source extensible platform for data management' +long_description = 'WiredTiger is a ' + short_description + '.\n\n' + \ + open(os.path.join(wt_dir, 'README')).read() + +wt_ver, wt_full_ver = get_wiredtiger_versions(wt_dir) +build_path = get_build_path() + +# We only need a small set of directories to build a WT library, +# we also include any files at the top level. +source_regex = r'^(?:(?:api|build_posix|ext|lang/python|src|dist)/|[^/]*$)' + +# The builtins that we include in this distribution. +builtins = [ + # [ name, libname, instructions ] + [ 'snappy', 'snappy', + 'Note: a suitable version of snappy can be found at\n' + \ + ' https://github.com/google/snappy/releases/download/' + \ + '1.1.3/snappy-1.1.3.tar.gz\n' + \ + 'It can be installed via: yum install snappy snappy-devel' + \ + 'or via: apt-get install libsnappy-dev' ], + [ 'zlib', 'z', + 'Need to install zlib\n' + \ + 'It can be installed via: apt-get install zlib1g' ] +] +builtin_names = [b[0] for b in builtins] +builtin_libraries = [b[1] for b in builtins] + +# Here's the configure/make operations we perform before the python extension +# is linked. +configure_cmds = [ + './makemake --clean-and-make', + './reconf', + # force building a position independent library; it will be linked + # into a single shared library with the SWIG interface code. + 'CFLAGS="${CFLAGS:-} -fPIC -DPIC" ' + \ + '../configure --enable-python --with-builtins=' + ','.join(builtin_names) +] + +# build all the builtins, at the moment they are all compressors. +make_cmds = [] +for name in builtin_names: + make_cmds.append('(cd ext/compressors/' + name + '/; make)') +make_cmds.append('make libwiredtiger.la') + +inc_paths = [ os.path.join(build_dir, 'src', 'include'), build_dir, '.' ] +lib_paths = [ '.' ] # wiredtiger.so is moved into the top level directory + +check_needed_dependencies(builtins, inc_paths, lib_paths) + +cppflags, cflags, ldflags = get_compile_flags(inc_paths, lib_paths) + +# If we are creating a source distribution, create a staging directory +# with just the right sources. Put the result in the python dist directory. +if pip_command == 'sdist': + sources, movers = source_filter(get_sources_curdir()) + stage_dir = os.path.join(python_rel_dir, 'stage') + shutil.rmtree(stage_dir, True) + os.makedirs(stage_dir) + shutil.copy2(this_abs_script, os.path.join(stage_dir, 'setup.py')) + for f in sources: + d = os.path.join(stage_dir, os.path.dirname(f)) + if not os.path.isdir(d): + os.makedirs(d) + if f in movers: + src = movers[f] + else: + src = f + # Symlinks are not followed in setup, we need to use real files. + shutil.copy2(src, os.path.join(stage_dir, f)) + os.chdir(stage_dir) + sys.argv.append('--dist-dir=' + os.path.join('..', 'dist')) +else: + sources = [ os.path.join(python_rel_dir, 'wiredtiger_wrap.c') ] + +wt_ext = Extension('_wiredtiger', + sources = sources, + extra_compile_args = cflags + cppflags, + extra_link_args = ldflags, + libraries = builtin_libraries, + extra_objects = [ os.path.join(build_dir, '.libs', 'libwiredtiger.a') ], + include_dirs = inc_paths, + library_dirs = lib_paths, +) +extensions = [ wt_ext ] +env = { "CFLAGS" : ' '.join(cflags), + "CPPFLAGS" : ' '.join(cppflags), + "LDFLAGS" : ' '.join(ldflags), + "PATH" : build_path } + +class BinaryDistribution(Distribution): + def is_pure(self): + return False + +class WTInstall(setuptools.command.install.install): + def run(self): + self.run_command("build_ext") + return setuptools.command.install.install.run(self) + +class WTBuildExt(setuptools.command.build_ext.build_ext): + def __init__(self, *args, **kwargs): + setuptools.command.build_ext.build_ext.__init__(self, *args, **kwargs) + + def run(self): + # only run this once + if not os.path.isfile(built_sentinal): + try: + os.remove(makefile) + except OSError: + pass + self.execute( + lambda: build_commands(configure_cmds, conf_make_dir, env), [], + 'wiredtiger configure') + if not os.path.isfile(makefile): + die('configure failed, file does not exist: ' + makefile) + self.execute( + lambda: build_commands(make_cmds, conf_make_dir, env), [], + 'wiredtiger make') + open(built_sentinal, 'a').close() + return setuptools.command.build_ext.build_ext.run(self) + +setup( + name = 'wiredtiger', + version = wt_full_ver, + author = 'The WiredTiger Development Team, part of MongoDB', + author_email = 'info@wiredtiger.com', + description = short_description, + license='GPL2,GPL3,Commercial', + long_description = long_description, + url = 'http://source.wiredtiger.com/', + keywords = 'scalable NoSQL database datastore engine open source', + packages = ['wiredtiger'], + ext_package = 'wiredtiger', + ext_modules = extensions, + include_package_data = True, + distclass = BinaryDistribution, + package_dir = { 'wiredtiger' : '.' }, + cmdclass = { 'install': WTInstall, 'build_ext': WTBuildExt }, + package_data = { + 'wiredtiger' : [ wt_swig_lib_name, '*.py' ] + }, + classifiers=[ + 'Intended Audience :: Developers', + 'Programming Language :: C', + 'Programming Language :: C++', + 'Programming Language :: Python', + 'Programming Language :: Java', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: POSIX', + 'Operating System :: POSIX :: BSD', + 'Operating System :: POSIX :: Linux', + 'Operating System :: POSIX :: SunOS/Solaris', + ] +) + +if pip_command == 'sdist': + shutil.rmtree(os.path.join(this_dir, 'stage')) diff --git a/lang/python/wiredtiger/pip_init.py b/lang/python/wiredtiger/pip_init.py new file mode 100644 index 00000000000..d59c8218976 --- /dev/null +++ b/lang/python/wiredtiger/pip_init.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +# pip_init.py +# This is installed as __init__.py, and imports the file created by SWIG. +# This is needed because SWIG's import helper code created by certain SWIG +# versions may be broken, see: https://github.com/swig/swig/issues/769 . +# Importing indirectly seems to avoid these issues. +import os, sys +fname = os.path.basename(__file__) +if fname != '__init__.py' and fname != '__init__.pyc': + print(__file__ + ': this file is not yet installed') + sys.exit(1) + +# After importing the SWIG-generated file, copy all symbols from from it +# to this module so they will appear in the wiredtiger namespace. +me = sys.modules[__name__] +sys.path.append(os.path.dirname(__file__)) # needed for Python3 +import wiredtiger +for name in dir(wiredtiger): + value = getattr(wiredtiger, name) + setattr(me, name, value) -- cgit v1.2.1 From 360b43b33170a89587a737988477d0619008ec2a Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 16 Mar 2017 23:50:25 -0400 Subject: WT-3216 changes suggested by clang-tidy (#3328) --- dist/api_err.py | 2 +- src/async/async_api.c | 10 ++-- src/block/block_addr.c | 2 +- src/block/block_ext.c | 11 +++-- src/block/block_read.c | 2 +- src/block/block_vrfy.c | 2 +- src/btree/bt_cursor.c | 12 +---- src/btree/bt_debug.c | 2 +- src/btree/bt_io.c | 9 ++-- src/btree/bt_read.c | 6 ++- src/btree/bt_split.c | 6 +-- src/btree/bt_sync.c | 6 ++- src/btree/bt_vrfy_dsk.c | 8 ++-- src/config/config_api.c | 10 ++-- src/conn/conn_sweep.c | 2 +- src/cursor/cur_join.c | 5 +- src/cursor/cur_json.c | 87 +++++++++++++++++----------------- src/cursor/cur_metadata.c | 4 +- src/cursor/cur_stat.c | 1 - src/cursor/cur_table.c | 4 +- src/evict/evict_lru.c | 12 +++-- src/include/api.h | 11 +++-- src/include/bitstring.i | 2 +- src/include/btmem.h | 16 +++---- src/include/cell.i | 16 +++---- src/include/column.i | 4 +- src/include/connection.h | 4 +- src/include/cursor.h | 8 ++-- src/include/dhandle.h | 12 ++--- src/include/intpack.i | 30 ++++++------ src/include/lint.h | 14 +++--- src/include/log.h | 10 ++-- src/include/misc.h | 18 +++---- src/include/mutex.i | 4 +- src/include/os.h | 2 +- src/include/packing.i | 44 ++++++++--------- src/include/schema.h | 7 +-- src/include/session.h | 2 +- src/include/stat.h | 2 +- src/include/wiredtiger.in | 18 +++---- src/log/log.c | 14 ++---- src/lsm/lsm_cursor.c | 13 ++++-- src/lsm/lsm_merge.c | 2 +- src/lsm/lsm_work_unit.c | 8 ++-- src/reconcile/rec_write.c | 17 ++----- src/support/crypto.c | 1 - src/txn/txn_ckpt.c | 1 - src/txn/txn_recover.c | 2 +- src/utilities/util_dump.c | 11 +++-- src/utilities/util_load.c | 4 +- src/utilities/util_main.c | 117 ++++++++++++++++++++++++---------------------- 51 files changed, 304 insertions(+), 313 deletions(-) diff --git a/dist/api_err.py b/dist/api_err.py index 82f961a4ac9..bd379ac8d70 100644 --- a/dist/api_err.py +++ b/dist/api_err.py @@ -82,7 +82,7 @@ for line in open('../src/include/wiredtiger.in', 'r'): ''.join('\n * ' + l for l in textwrap.wrap( textwrap.dedent(err.long_desc).strip(), 77)) + '\n' if err.long_desc else '')) - tfile.write('#define\t%s\t%d\n' % (err.name, err.value)) + tfile.write('#define\t%s\t(%d)\n' % (err.name, err.value)) if 'undoc' in err.flags: tfile.write('/*! @endcond */\n') tfile.write('/*\n') diff --git a/src/async/async_api.c b/src/async/async_api.c index 026a008188c..b9cc995f5a5 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -338,17 +338,15 @@ __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) * 2. If async is off, and the user wants it on, start it. * 3. If not a toggle and async is off, we're done. */ - if (conn->async_cfg && !run) { - /* Case 1 */ + if (conn->async_cfg && !run) { /* Case 1 */ WT_TRET(__wt_async_flush(session)); ret = __wt_async_destroy(session); conn->async_cfg = false; return (ret); - } else if (!conn->async_cfg && run) - /* Case 2 */ + } + if (!conn->async_cfg && run) /* Case 2 */ return (__async_start(session)); - else if (!conn->async_cfg) - /* Case 3 */ + if (!conn->async_cfg) /* Case 3 */ return (0); /* diff --git a/src/block/block_addr.c b/src/block/block_addr.c index 580316bdfc6..a67efca62a3 100644 --- a/src/block/block_addr.c +++ b/src/block/block_addr.c @@ -226,7 +226,7 @@ __wt_block_ckpt_to_buffer(WT_SESSION_IMPL *session, ci->discard.offset, ci->discard.size, ci->discard.checksum)); a = (uint64_t)ci->file_size; WT_RET(__wt_vpack_uint(pp, 0, a)); - a = (uint64_t)ci->ckpt_size; + a = ci->ckpt_size; WT_RET(__wt_vpack_uint(pp, 0, a)); return (0); diff --git a/src/block/block_ext.c b/src/block/block_ext.c index 26acc8c560f..e9357d73d1d 100644 --- a/src/block/block_ext.c +++ b/src/block/block_ext.c @@ -634,11 +634,11 @@ __wt_block_off_free( */ if ((ret = __wt_block_off_remove_overlap( session, block, &block->live.alloc, offset, size)) == 0) - ret = __block_merge(session, block, - &block->live.avail, offset, (wt_off_t)size); + ret = __block_merge( + session, block, &block->live.avail, offset, size); else if (ret == WT_NOTFOUND) - ret = __block_merge(session, block, - &block->live.discard, offset, (wt_off_t)size); + ret = __block_merge( + session, block, &block->live.discard, offset, size); return (ret); } @@ -1247,7 +1247,8 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_DECL_RET; WT_EXT *ext; WT_PAGE_HEADER *dsk; - size_t entries, size; + uint32_t entries; + size_t size; uint8_t *p; WT_RET(__block_extlist_dump(session, block, el, "write")); diff --git a/src/block/block_read.c b/src/block/block_read.c index 869a92b6ae1..8d4aec7df75 100644 --- a/src/block/block_read.c +++ b/src/block/block_read.c @@ -39,7 +39,7 @@ __wt_bm_preload( (uint8_t *)bm->map + offset, size, bm->mapped_cookie); if (!mapped && handle->fh_advise != NULL) ret = handle->fh_advise(handle, (WT_SESSION *)session, - (wt_off_t)offset, (wt_off_t)size, WT_FILE_HANDLE_WILLNEED); + offset, (wt_off_t)size, WT_FILE_HANDLE_WILLNEED); if (ret != EBUSY && ret != ENOTSUP) return (ret); diff --git a/src/block/block_vrfy.c b/src/block/block_vrfy.c index 94824ad19f8..154765ed079 100644 --- a/src/block/block_vrfy.c +++ b/src/block/block_vrfy.c @@ -22,7 +22,7 @@ static int __verify_set_file_size(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *); ((off) / (block)->allocsize - 1) #ifdef HAVE_VERBOSE #define WT_FRAG_TO_OFF(block, frag) \ - (((wt_off_t)(frag + 1)) * (block)->allocsize) + (((wt_off_t)((frag) + 1)) * (block)->allocsize) #endif /* diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index d6dc0991d3f..48ae1ad6d76 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -1108,11 +1108,7 @@ retry: WT_RET(__wt_btcur_search(start)); WT_ASSERT(session, F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); - /* - * Reset ret each time through so that we don't loop forever in - * the cursor equals case. - */ - for (ret = 0;;) { + for (;;) { if ((ret = rmfunc(session, start, 1)) != 0) break; @@ -1176,11 +1172,7 @@ retry: WT_RET(__wt_btcur_search(start)); WT_ASSERT(session, F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); - /* - * Reset ret each time through so that we don't loop forever in - * the cursor equals case. - */ - for (ret = 0;;) { + for (;;) { value = (const uint8_t *)start->iface.value.data; if (*value != 0 && (ret = rmfunc(session, start, 1)) != 0) diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index d664da2ebd3..4989301468f 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -34,7 +34,7 @@ static const /* Output separator */ static int __debug_cell(WT_DBG *, const WT_PAGE_HEADER *, WT_CELL_UNPACK *); static int __debug_cell_data( - WT_DBG *, WT_PAGE *, int type, const char *, WT_CELL_UNPACK *); + WT_DBG *, WT_PAGE *, int, const char *, WT_CELL_UNPACK *); static int __debug_col_skip(WT_DBG *, WT_INSERT_HEAD *, const char *, bool); static int __debug_config(WT_SESSION_IMPL *, WT_DBG *, const char *); static int __debug_dsk_cell(WT_DBG *, const WT_PAGE_HEADER *); diff --git a/src/btree/bt_io.c b/src/btree/bt_io.c index a8645f79dbe..b5e4d52394a 100644 --- a/src/btree/bt_io.c +++ b/src/btree/bt_io.c @@ -183,7 +183,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t dst_len, len, result_len, size, src_len; int compression_failed; /* Extension API, so not a bool. */ uint8_t *dst, *src; - bool data_checksum, encrypted; + bool data_checksum, encrypted, timer; btree = S2BT(session); bm = btree->bm; @@ -216,7 +216,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, &result_len)); WT_ASSERT(session, dsk->mem_size == result_len + WT_BLOCK_COMPRESS_SKIP); - ctmp->size = (uint32_t)result_len + WT_BLOCK_COMPRESS_SKIP; + ctmp->size = result_len + WT_BLOCK_COMPRESS_SKIP; ip = ctmp; } else { WT_ASSERT(session, dsk->mem_size == buf->size); @@ -357,7 +357,8 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, data_checksum = !compressed; break; } - if (!F_ISSET(session, WT_SESSION_INTERNAL)) + timer = !F_ISSET(session, WT_SESSION_INTERNAL); + if (timer) __wt_epoch(session, &start); /* Call the block manager to write the block. */ @@ -367,7 +368,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, bm, session, ip, addr, addr_sizep, data_checksum, checkpoint_io)); /* Update some statistics now that the write is done */ - if (!F_ISSET(session, WT_SESSION_INTERNAL)) { + if (timer) { __wt_epoch(session, &stop); WT_STAT_CONN_INCR(session, cache_write_app_count); WT_STAT_CONN_INCRV(session, cache_write_app_time, diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index e87ddc082f2..b170a9fb900 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -369,6 +369,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref) size_t addr_size; uint32_t previous_state; const uint8_t *addr; + bool timer; btree = S2BT(session); page = NULL; @@ -408,10 +409,11 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref) * There's an address, read or map the backing disk page and build an * in-memory version of the page. */ - if (!F_ISSET(session, WT_SESSION_INTERNAL)) + timer = !F_ISSET(session, WT_SESSION_INTERNAL); + if (timer) __wt_epoch(session, &start); WT_ERR(__wt_bt_read(session, &tmp, addr, addr_size)); - if (!F_ISSET(session, WT_SESSION_INTERNAL)) { + if (timer) { __wt_epoch(session, &stop); WT_STAT_CONN_INCR(session, cache_read_app_count); WT_STAT_CONN_INCRV(session, cache_read_app_time, diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 6b2100ec7e3..b1bad760826 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -10,8 +10,8 @@ #define WT_MEM_TRANSFER(from_decr, to_incr, len) do { \ size_t __len = (len); \ - from_decr += __len; \ - to_incr += __len; \ + (from_decr) += __len; \ + (to_incr) += __len; \ } while (0) /* @@ -119,7 +119,7 @@ __wt_split_stash_discard(WT_SESSION_IMPL *session) ++i, ++stash) { if (stash->p == NULL) continue; - else if (stash->split_gen >= oldest) + if (stash->split_gen >= oldest) break; /* * It's a bad thing if another thread is in this memory after diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index 7bf15baa67f..cdb27752fb7 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -78,6 +78,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages; uint64_t oldest_id, saved_pinned_id; uint32_t flags; + bool timer; conn = S2C(session); btree = S2BT(session); @@ -88,7 +89,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) internal_bytes = leaf_bytes = 0; internal_pages = leaf_pages = 0; - if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) + timer = WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT); + if (timer) __wt_epoch(session, &start); switch (syncop) { @@ -242,7 +244,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) break; } - if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) { + if (timer) { __wt_epoch(session, &end); __wt_verbose(session, WT_VERB_CHECKPOINT, "__sync_file WT_SYNC_%s wrote: %" PRIu64 diff --git a/src/btree/bt_vrfy_dsk.c b/src/btree/bt_vrfy_dsk.c index 3a6fd8261ba..a4071c44aee 100644 --- a/src/btree/bt_vrfy_dsk.c +++ b/src/btree/bt_vrfy_dsk.c @@ -203,7 +203,8 @@ __verify_dsk_row( WT_ITEM *last; enum { FIRST, WAS_KEY, WAS_VALUE } last_cell_type; void *huffman; - uint32_t cell_num, cell_type, i, key_cnt, prefix; + size_t prefix; + uint32_t cell_num, cell_type, i, key_cnt; uint8_t *end; int cmp; @@ -343,8 +344,9 @@ __verify_dsk_row( if (cell_num > 1 && prefix > last->size) WT_ERR_VRFY(session, "key %" PRIu32 " on page at %s has a prefix " - "compression count of %" PRIu32 ", larger than " - "the length of the previous key, %" WT_SIZET_FMT, + "compression count of %" WT_SIZET_FMT + ", larger than the length of the previous key, %" + WT_SIZET_FMT, cell_num, tag, prefix, last->size); /* diff --git a/src/config/config_api.c b/src/config/config_api.c index 05c5c1287a7..9f70ba65e9b 100644 --- a/src/config/config_api.c +++ b/src/config/config_api.c @@ -215,7 +215,7 @@ __wt_configure_method(WT_SESSION_IMPL *session, WT_CONFIG_ENTRY *entry; WT_CONNECTION_IMPL *conn; WT_DECL_RET; - size_t cnt; + size_t cnt, len; char *newcheck_name, *p; /* @@ -276,11 +276,9 @@ __wt_configure_method(WT_SESSION_IMPL *session, */ WT_ERR(__wt_calloc_one(session, &entry)); entry->method = (*epp)->method; - WT_ERR(__wt_calloc_def(session, - strlen((*epp)->base) + strlen(",") + strlen(config) + 1, &p)); - (void)strcpy(p, (*epp)->base); - (void)strcat(p, ","); - (void)strcat(p, config); + len = strlen((*epp)->base) + strlen(",") + strlen(config) + 1; + WT_ERR(__wt_calloc_def(session, len, &p)); + snprintf(p, len, "%s,%s", (*epp)->base, config); entry->base = p; /* diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index 8c186c63939..22d90b08438 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -10,7 +10,7 @@ #define WT_DHANDLE_CAN_DISCARD(dhandle) \ (!F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_OPEN) && \ - dhandle->session_inuse == 0 && dhandle->session_ref == 0) + (dhandle)->session_inuse == 0 && (dhandle)->session_ref == 0) /* * __sweep_mark -- diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 013a64ef2d5..8df8e201173 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -270,7 +270,7 @@ again: iter->positioned = true; return (ret); } - else if (ret == WT_NOTFOUND) { + if (ret == WT_NOTFOUND) { WT_RET(__curjoin_iter_close_all(iter->child)); entry->subjoin->iter = NULL; iter->child = NULL; @@ -518,8 +518,7 @@ __curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, } if (disjunction && end == endmax) return (WT_NOTFOUND); - else - return (0); + return (0); } typedef struct { diff --git a/src/cursor/cur_json.c b/src/cursor/cur_json.c index 5870d14273e..0ad3c4f4201 100644 --- a/src/cursor/cur_json.c +++ b/src/cursor/cur_json.c @@ -23,20 +23,20 @@ static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *, bool, const char *, size_t *); #define WT_PACK_JSON_GET(session, pv, jstr) do { \ - switch (pv.type) { \ + switch ((pv).type) { \ case 'x': \ break; \ case 's': \ case 'S': \ - WT_RET(json_string_arg(session, &jstr, &pv.u.item)); \ - pv.type = pv.type == 's' ? 'j' : 'J'; \ + WT_RET(json_string_arg(session, &(jstr), &(pv).u.item));\ + (pv).type = (pv).type == 's' ? 'j' : 'J'; \ break; \ case 'b': \ case 'h': \ case 'i': \ case 'l': \ case 'q': \ - WT_RET(json_int_arg(session, &jstr, &pv.u.i)); \ + WT_RET(json_int_arg(session, &(jstr), &(pv).u.i)); \ break; \ case 'B': \ case 'H': \ @@ -46,11 +46,11 @@ static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *, case 'r': \ case 'R': \ case 't': \ - WT_RET(json_uint_arg(session, &jstr, &pv.u.u)); \ + WT_RET(json_uint_arg(session, &(jstr), &(pv).u.u)); \ break; \ case 'u': \ - WT_RET(json_string_arg(session, &jstr, &pv.u.item)); \ - pv.type = 'K'; \ + WT_RET(json_string_arg(session, &(jstr), &(pv).u.item));\ + (pv).type = 'K'; \ break; \ /* User format strings have already been validated. */ \ WT_ILLEGAL_VALUE(session); \ @@ -304,7 +304,6 @@ __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor) __wt_free(session, json->value_buf); __wt_free(session, json); } - return; } /* @@ -323,33 +322,32 @@ __wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode) if (bufsz >= 1) *buf = ch; return (1); - } else { - abbrev = '\0'; - switch (ch) { - case '\\': - case '"': - abbrev = ch; - break; - case '\f': - abbrev = 'f'; - break; - case '\n': - abbrev = 'n'; - break; - case '\r': - abbrev = 'r'; - break; - case '\t': - abbrev = 't'; - break; - } - if (abbrev != '\0') { - if (bufsz >= 2) { - *buf++ = '\\'; - *buf = abbrev; - } - return (2); + } + abbrev = '\0'; + switch (ch) { + case '\\': + case '"': + abbrev = ch; + break; + case '\f': + abbrev = 'f'; + break; + case '\n': + abbrev = 'n'; + break; + case '\r': + abbrev = 'r'; + break; + case '\t': + abbrev = 't'; + break; + } + if (abbrev != '\0') { + if (bufsz >= 2) { + *buf++ = '\\'; + *buf = abbrev; } + return (2); } } if (bufsz >= 6) { @@ -421,16 +419,16 @@ __wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat, #define MATCH_KEYWORD(session, in, result, keyword, matchval) do { \ size_t _kwlen = strlen(keyword); \ if (strncmp(in, keyword, _kwlen) == 0 && \ - !__wt_isalnum((u_char)in[_kwlen])) { \ - in += _kwlen; \ - result = matchval; \ + !__wt_isalnum((u_char)(in)[_kwlen])) { \ + (in) += _kwlen; \ + (result) = matchval; \ } else { \ - const char *_bad = in; \ - while (__wt_isalnum((u_char)*in)) \ - in++; \ + const char *_bad = (in); \ + while (__wt_isalnum((u_char)*(in))) \ + (in)++; \ WT_RET_MSG(session, EINVAL, \ "unknown keyword \"%.*s\" in JSON", \ - (int)(in - _bad), _bad); \ + (int)((in) - _bad), _bad); \ } \ } while (0) @@ -692,12 +690,13 @@ json_uint_arg(WT_SESSION_IMPL *session, const char **jstr, uint64_t *up) #define JSON_EXPECT_TOKEN_GET(session, jstr, tokval, start, sz) do { \ int __tok; \ - WT_RET(__wt_json_token((WT_SESSION *)session, jstr, &__tok, &start, &sz));\ - if (__tok != tokval) \ + WT_RET(__wt_json_token( \ + (WT_SESSION *)(session), jstr, &__tok, &(start), &(sz))); \ + if (__tok != (tokval)) \ WT_RET_MSG(session, EINVAL, \ "expected JSON %s, got %s", \ __wt_json_tokname(tokval), __wt_json_tokname(__tok)); \ - jstr = start + sz; \ + (jstr) = (start) + (sz); \ } while (0) #define JSON_EXPECT_TOKEN(session, jstr, tokval) do { \ diff --git a/src/cursor/cur_metadata.c b/src/cursor/cur_metadata.c index 10e2fdf28be..fbfc73956e2 100644 --- a/src/cursor/cur_metadata.c +++ b/src/cursor/cur_metadata.c @@ -16,7 +16,7 @@ WT_CURSOR_NEEDKEY(cursor); \ WT_ERR(__wt_buf_set(session, \ &((WT_CURSOR_METADATA *)(cursor))->file_cursor->key, \ - cursor->key.data, cursor->key.size)); \ + (cursor)->key.data, (cursor)->key.size)); \ F_SET(((WT_CURSOR_METADATA *)(cursor))->file_cursor, \ WT_CURSTD_KEY_EXT); \ } while (0) @@ -25,7 +25,7 @@ WT_CURSOR_NEEDVALUE(cursor); \ WT_ERR(__wt_buf_set(session, \ &((WT_CURSOR_METADATA *)(cursor))->file_cursor->value, \ - cursor->value.data, cursor->value.size)); \ + (cursor)->value.data, (cursor)->value.size)); \ F_SET(((WT_CURSOR_METADATA *)(cursor))->file_cursor, \ WT_CURSTD_VALUE_EXT); \ } while (0) diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c index 5fde64c74ca..c5ccdb1b649 100644 --- a/src/cursor/cur_stat.c +++ b/src/cursor/cur_stat.c @@ -163,7 +163,6 @@ static void __curstat_set_value(WT_CURSOR *cursor, ...) { WT_UNUSED(cursor); - return; } /* diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c index 72eec177449..ef2c0ac5163 100644 --- a/src/cursor/cur_table.c +++ b/src/cursor/cur_table.c @@ -14,8 +14,8 @@ static int __curtable_update(WT_CURSOR *cursor); #define APPLY_CG(ctable, f) do { \ WT_CURSOR **__cp; \ u_int __i; \ - for (__i = 0, __cp = ctable->cg_cursors; \ - __i < WT_COLGROUPS(ctable->table); \ + for (__i = 0, __cp = (ctable)->cg_cursors; \ + __i < WT_COLGROUPS((ctable)->table); \ __i++, __cp++) \ WT_TRET((*__cp)->f(*__cp)); \ } while (0) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 6863533acfb..84c9990832d 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -934,7 +934,6 @@ __evict_tune_workers(WT_SESSION_IMPL *session) cache = conn->cache; WT_ASSERT(session, conn->evict_threads.threads[0]->session == session); - pgs_evicted_persec_cur = 0; if (conn->evict_tune_stable) return (0); @@ -966,7 +965,8 @@ __evict_tune_workers(WT_SESSION_IMPL *session) pgs_evicted_persec_cur = (delta_pages * WT_THOUSAND) / delta_msec; conn->evict_tune_num_points++; - /* Keep track of the maximum eviction throughput seen and the number + /* + * Keep track of the maximum eviction throughput seen and the number * of workers corresponding to that throughput. */ if (pgs_evicted_persec_cur > conn->evict_tune_pg_sec_max) { @@ -2116,6 +2116,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *txn_state; uint64_t init_evict_count, max_pages_evicted; + bool timer; conn = S2C(session); cache = conn->cache; @@ -2136,7 +2137,9 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) __wt_evict_server_wake(session); /* Track how long application threads spend doing eviction. */ - if (WT_STAT_ENABLED(session) && !F_ISSET(session, WT_SESSION_INTERNAL)) + timer = + WT_STAT_ENABLED(session) && !F_ISSET(session, WT_SESSION_INTERNAL); + if (timer) __wt_epoch(session, &enter); for (init_evict_count = cache->pages_evict;; ret = 0) { @@ -2202,8 +2205,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) } } -err: if (WT_STAT_ENABLED(session) && - !F_ISSET(session, WT_SESSION_INTERNAL)) { +err: if (timer) { __wt_epoch(session, &leave); WT_STAT_CONN_INCRV(session, application_cache_time, WT_TIMEDIFF_US(leave, enter)); diff --git a/src/include/api.h b/src/include/api.h index 1fa777ed5cc..a3636eb8040 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -19,7 +19,7 @@ __wt_verbose((s), WT_VERB_API, "CALL: " #h ":" #n) #define API_CALL(s, h, n, dh, config, cfg) do { \ - const char *cfg[] = \ + const char *(cfg)[] = \ { WT_CONFIG_BASE(s, h##_##n), config, NULL }; \ API_SESSION_INIT(s, h, n, dh); \ WT_ERR(WT_SESSION_CHECK_PANIC(s)); \ @@ -62,15 +62,16 @@ if (__autotxn) { \ if (F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT)) \ F_CLR(&(s)->txn, WT_TXN_AUTOCOMMIT); \ - else if (ret == 0 && !F_ISSET(&(s)->txn, WT_TXN_ERROR)) \ - ret = __wt_txn_commit((s), NULL); \ + else if ((ret) == 0 && \ + !F_ISSET(&(s)->txn, WT_TXN_ERROR)) \ + (ret) = __wt_txn_commit((s), NULL); \ else { \ if (retry) \ WT_TRET(__wt_session_copy_values(s)); \ WT_TRET(__wt_txn_rollback((s), NULL)); \ - if ((ret == 0 || ret == WT_ROLLBACK) && \ + if (((ret) == 0 || (ret) == WT_ROLLBACK) && \ (retry)) { \ - ret = 0; \ + (ret) = 0; \ continue; \ } \ WT_TRET(__wt_session_reset_cursors(s, false)); \ diff --git a/src/include/bitstring.i b/src/include/bitstring.i index 08746beb9b9..118dc0bba01 100644 --- a/src/include/bitstring.i +++ b/src/include/bitstring.i @@ -230,7 +230,7 @@ __bit_getv(uint8_t *bitf, uint64_t entry, uint8_t width) #define __BIT_GET(len, mask) \ case len: \ if (__bit_test(bitf, bit)) \ - value |= mask; \ + value |= (mask); \ ++bit \ /* FALLTHROUGH */ diff --git a/src/include/btmem.h b/src/include/btmem.h index 39ca223aebf..f1bb08d2699 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -507,7 +507,7 @@ struct __wt_page { #define WT_INTL_INDEX_GET_SAFE(page) \ ((page)->u.intl.__index) #define WT_INTL_INDEX_GET(session, page, pindex) do { \ - WT_ASSERT(session, session->split_gen != 0); \ + WT_ASSERT(session, (session)->split_gen != 0); \ (pindex) = WT_INTL_INDEX_GET_SAFE(page); \ } while (0) #define WT_INTL_INDEX_SET(page, v) do { \ @@ -868,7 +868,7 @@ struct __wt_col { * Return the 0-based array offset based on a WT_COL reference. */ #define WT_COL_SLOT(page, cip) \ - ((uint32_t)(((WT_COL *)cip) - (page)->pg_var)) + ((uint32_t)(((WT_COL *)(cip)) - (page)->pg_var)) /* * WT_IKEY -- @@ -977,10 +977,10 @@ struct __wt_insert { } key; } u; -#define WT_INSERT_KEY_SIZE(ins) (((WT_INSERT *)ins)->u.key.size) +#define WT_INSERT_KEY_SIZE(ins) (((WT_INSERT *)(ins))->u.key.size) #define WT_INSERT_KEY(ins) \ - ((void *)((uint8_t *)(ins) + ((WT_INSERT *)ins)->u.key.offset)) -#define WT_INSERT_RECNO(ins) (((WT_INSERT *)ins)->u.recno) + ((void *)((uint8_t *)(ins) + ((WT_INSERT *)(ins))->u.key.offset)) +#define WT_INSERT_RECNO(ins) (((WT_INSERT *)(ins))->u.recno) WT_INSERT *next[0]; /* forward-linked skip list */ }; @@ -989,9 +989,9 @@ struct __wt_insert { * Skiplist helper macros. */ #define WT_SKIP_FIRST(ins_head) \ - (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)ins_head)->head[0]) + (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)(ins_head))->head[0]) #define WT_SKIP_LAST(ins_head) \ - (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)ins_head)->tail[0]) + (((ins_head) == NULL) ? NULL : ((WT_INSERT_HEAD *)(ins_head))->tail[0]) #define WT_SKIP_NEXT(ins) ((ins)->next[0]) #define WT_SKIP_FOREACH(ins, ins_head) \ for ((ins) = WT_SKIP_FIRST(ins_head); \ @@ -1004,7 +1004,7 @@ struct __wt_insert { #define WT_PAGE_ALLOC_AND_SWAP(s, page, dest, v, count) do { \ if (((v) = (dest)) == NULL) { \ WT_ERR(__wt_calloc_def(s, count, &(v))); \ - if (__wt_atomic_cas_ptr(&dest, NULL, v)) \ + if (__wt_atomic_cas_ptr(&(dest), NULL, v)) \ __wt_cache_page_inmem_incr( \ s, page, (count) * sizeof(*(v))); \ else \ diff --git a/src/include/cell.i b/src/include/cell.i index c130768e595..71c2515daf0 100644 --- a/src/include/cell.i +++ b/src/include/cell.i @@ -361,14 +361,12 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size) cell->__chunk[0] = (uint8_t) ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT); return (1); - } else { - byte = (uint8_t)size; /* Type + length */ - cell->__chunk[0] = (uint8_t) - ((byte << WT_CELL_SHORT_SHIFT) | - WT_CELL_KEY_SHORT_PFX); - cell->__chunk[1] = prefix; /* Prefix */ - return (2); } + byte = (uint8_t)size; /* Type + length */ + cell->__chunk[0] = (uint8_t) + ((byte << WT_CELL_SHORT_SHIFT) | WT_CELL_KEY_SHORT_PFX); + cell->__chunk[1] = prefix; /* Prefix */ + return (2); } if (prefix == 0) { @@ -569,8 +567,8 @@ __wt_cell_unpack_safe( */ #define WT_CELL_LEN_CHK(t, len) do { \ if (start != NULL && \ - ((uint8_t *)t < (uint8_t *)start || \ - (((uint8_t *)t) + (len)) > (uint8_t *)end)) \ + ((uint8_t *)(t) < (uint8_t *)start || \ + (((uint8_t *)(t)) + (len)) > (uint8_t *)end)) \ return (WT_ERROR); \ } while (0) diff --git a/src/include/column.i b/src/include/column.i index c1b45a1f4e0..07b627315e6 100644 --- a/src/include/column.i +++ b/src/include/column.i @@ -108,7 +108,7 @@ __col_insert_search_match(WT_INSERT_HEAD *ins_head, uint64_t recno) /* Fast path the check for values at the end of the skiplist. */ if (recno > WT_INSERT_RECNO(ret_ins)) return (NULL); - else if (recno == WT_INSERT_RECNO(ret_ins)) + if (recno == WT_INSERT_RECNO(ret_ins)) return (ret_ins); /* @@ -127,7 +127,7 @@ __col_insert_search_match(WT_INSERT_HEAD *ins_head, uint64_t recno) if (cmp == 0) /* Exact match: return */ return (*insp); - else if (cmp > 0) /* Keep going at this level */ + if (cmp > 0) /* Keep going at this level */ insp = &(*insp)->next[i]; else { /* Drop down a level */ --i; diff --git a/src/include/connection.h b/src/include/connection.h index ce483d3291a..6c23492e926 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -127,7 +127,7 @@ struct __wt_named_extractor { F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \ TAILQ_INSERT_HEAD(&(conn)->dhqh, dhandle, q); \ TAILQ_INSERT_HEAD(&(conn)->dhhash[bucket], dhandle, hashq); \ - ++conn->dhandle_count; \ + ++(conn)->dhandle_count; \ } while (0) #define WT_CONN_DHANDLE_REMOVE(conn, dhandle, bucket) do { \ @@ -135,7 +135,7 @@ struct __wt_named_extractor { F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); \ TAILQ_REMOVE(&(conn)->dhqh, dhandle, q); \ TAILQ_REMOVE(&(conn)->dhhash[bucket], dhandle, hashq); \ - --conn->dhandle_count; \ + --(conn)->dhandle_count; \ } while (0) /* diff --git a/src/include/cursor.h b/src/include/cursor.h index 31c8963a486..f32b4250d30 100644 --- a/src/include/cursor.h +++ b/src/include/cursor.h @@ -73,7 +73,7 @@ struct __wt_cursor_backup { #define WT_CURBACKUP_LOCKER 0x01 /* Hot-backup started */ uint8_t flags; }; -#define WT_CURSOR_BACKUP_ID(cursor) (((WT_CURSOR_BACKUP *)cursor)->maxid) +#define WT_CURSOR_BACKUP_ID(cursor) (((WT_CURSOR_BACKUP *)(cursor))->maxid) struct __wt_cursor_btree { WT_CURSOR iface; @@ -474,7 +474,7 @@ struct __wt_cursor_stat { * Return a reference to a statistic cursor's stats structures. */ #define WT_CURSOR_STATS(cursor) \ - (((WT_CURSOR_STAT *)cursor)->stats) + (((WT_CURSOR_STAT *)(cursor))->stats) struct __wt_cursor_table { WT_CURSOR iface; @@ -493,7 +493,7 @@ struct __wt_cursor_table { }; #define WT_CURSOR_PRIMARY(cursor) \ - (((WT_CURSOR_TABLE *)cursor)->cg_cursors[0]) + (((WT_CURSOR_TABLE *)(cursor))->cg_cursors[0]) #define WT_CURSOR_RECNO(cursor) WT_STREQ((cursor)->key_format, "r") @@ -550,4 +550,4 @@ struct __wt_cursor_table { } while (0) #define WT_CURSOR_RAW_OK \ - WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_PRINT | WT_CURSTD_RAW + (WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_PRINT | WT_CURSTD_RAW) diff --git a/src/include/dhandle.h b/src/include/dhandle.h index 4f318e7bccf..8861e96112b 100644 --- a/src/include/dhandle.h +++ b/src/include/dhandle.h @@ -38,20 +38,20 @@ (((WT_CURSOR_BTREE *)((s)->meta_cursor))->btree->dhandle) #define WT_DHANDLE_ACQUIRE(dhandle) \ - (void)__wt_atomic_add32(&dhandle->session_ref, 1) + (void)__wt_atomic_add32(&(dhandle)->session_ref, 1) #define WT_DHANDLE_RELEASE(dhandle) \ - (void)__wt_atomic_sub32(&dhandle->session_ref, 1) + (void)__wt_atomic_sub32(&(dhandle)->session_ref, 1) #define WT_DHANDLE_NEXT(session, dhandle, head, field) do { \ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST));\ - if (dhandle == NULL) \ - dhandle = TAILQ_FIRST(head); \ + if ((dhandle) == NULL) \ + (dhandle) = TAILQ_FIRST(head); \ else { \ WT_DHANDLE_RELEASE(dhandle); \ - dhandle = TAILQ_NEXT(dhandle, field); \ + (dhandle) = TAILQ_NEXT(dhandle, field); \ } \ - if (dhandle != NULL) \ + if ((dhandle) != NULL) \ WT_DHANDLE_ACQUIRE(dhandle); \ } while (0) diff --git a/src/include/intpack.i b/src/include/intpack.i index e8bea58cede..a534de9d9a8 100644 --- a/src/include/intpack.i +++ b/src/include/intpack.i @@ -59,21 +59,21 @@ /* Count the leading zero bytes. */ #if defined(__GNUC__) #define WT_LEADING_ZEROS(x, i) \ - (i = (x == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3) + ((i) = ((x) == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3) #elif defined(_MSC_VER) #define WT_LEADING_ZEROS(x, i) do { \ - if (x == 0) i = (int)sizeof(x); \ + if ((x) == 0) (i) = (int)sizeof(x); \ else { \ unsigned long __index; \ _BitScanReverse64(&__index, x); \ __index = 63 ^ __index; \ - i = (int)(__index >> 3); } \ + (i) = (int)(__index >> 3); } \ } while (0) #else #define WT_LEADING_ZEROS(x, i) do { \ uint64_t __x = (x); \ uint64_t __m = (uint64_t)0xff << 56; \ - for (i = 0; !(__x & __m) && i != 8; i++) \ + for ((i) = 0; !(__x & __m) && (i) != 8; (i)++) \ __m >>= 8; \ } while (0) #endif @@ -231,7 +231,8 @@ __wt_vpack_int(uint8_t **pp, size_t maxlen, int64_t x) if (x < NEG_2BYTE_MIN) { *p = NEG_MULTI_MARKER; return (__wt_vpack_negint(pp, maxlen, (uint64_t)x)); - } else if (x < NEG_1BYTE_MIN) { + } + if (x < NEG_1BYTE_MIN) { WT_SIZE_CHECK_PACK(2, maxlen); x -= NEG_2BYTE_MIN; *p++ = NEG_2BYTE_MARKER | GET_BITS(x, 13, 8); @@ -358,12 +359,10 @@ __wt_vsize_uint(uint64_t x) { if (x <= POS_1BYTE_MAX) return (1); - else if (x <= POS_2BYTE_MAX + 1) { + if (x <= POS_2BYTE_MAX + 1) return (2); - } else { - x -= POS_2BYTE_MAX + 1; - return (__wt_vsize_posint(x)); - } + x -= POS_2BYTE_MAX + 1; + return (__wt_vsize_posint(x)); } /* @@ -373,13 +372,12 @@ __wt_vsize_uint(uint64_t x) static inline size_t __wt_vsize_int(int64_t x) { - if (x < NEG_2BYTE_MIN) { + if (x < NEG_2BYTE_MIN) return (__wt_vsize_negint((uint64_t)x)); - } else if (x < NEG_1BYTE_MIN) { + if (x < NEG_1BYTE_MIN) return (2); - } else if (x < 0) { + if (x < 0) return (1); - } else - /* For non-negative values, use the unsigned code above. */ - return (__wt_vsize_uint((uint64_t)x)); + /* For non-negative values, use the unsigned code above. */ + return (__wt_vsize_uint((uint64_t)x)); } diff --git a/src/include/lint.h b/src/include/lint.h index e20a83144ee..2d0f47988b7 100644 --- a/src/include/lint.h +++ b/src/include/lint.h @@ -29,9 +29,9 @@ __wt_atomic_fetch_add##name(type *vp, type v) \ { \ type orig; \ \ - old = *vp; \ + orig = *vp; \ *vp += v; \ - return (old); \ + return (orig); \ } \ static inline ret \ __wt_atomic_store##name(type *vp, type v) \ @@ -40,7 +40,7 @@ __wt_atomic_store##name(type *vp, type v) \ \ orig = *vp; \ *vp = v; \ - return (old); \ + return (orig); \ } \ static inline ret \ __wt_atomic_sub##name(type *vp, type v) \ @@ -49,9 +49,9 @@ __wt_atomic_sub##name(type *vp, type v) \ return (*vp); \ } \ static inline bool \ -__wt_atomic_cas##name(type *vp, type old, type new) \ +__wt_atomic_cas##name(type *vp, type orig, type new) \ { \ - if (*vp == old) { \ + if (*vp == orig) { \ *vp = new; \ return (true); \ } \ @@ -75,8 +75,8 @@ WT_ATOMIC_FUNC(size, size_t, size_t) * Pointer compare and swap. */ static inline bool -__wt_atomic_cas_ptr(void *vp, void *old, void *new) { - if (*(void **)vp == old) { +__wt_atomic_cas_ptr(void *vp, void *orig, void *new) { + if (*(void **)vp == orig) { *(void **)vp = new; return (true); } diff --git a/src/include/log.h b/src/include/log.h index a6be3582b4d..f0999ba316b 100644 --- a/src/include/log.h +++ b/src/include/log.h @@ -86,8 +86,8 @@ union __wt_lsn { * The high bit is reserved for the special states. If the high bit is * set (WT_LOG_SLOT_RESERVED) then we are guaranteed to be in a special state. */ -#define WT_LOG_SLOT_FREE -1 /* Not in use */ -#define WT_LOG_SLOT_WRITTEN -2 /* Slot data written, not processed */ +#define WT_LOG_SLOT_FREE (-1) /* Not in use */ +#define WT_LOG_SLOT_WRITTEN (-2) /* Slot data written, not processed */ /* * We allocate the buffer size, but trigger a slot switch when we cross @@ -144,8 +144,8 @@ union __wt_lsn { /* Slot is in use, but closed to new joins */ #define WT_LOG_SLOT_CLOSED(state) \ (WT_LOG_SLOT_ACTIVE(state) && \ - (FLD64_ISSET((uint64_t)state, WT_LOG_SLOT_CLOSE) && \ - !FLD64_ISSET((uint64_t)state, WT_LOG_SLOT_RESERVED))) + (FLD64_ISSET((uint64_t)(state), WT_LOG_SLOT_CLOSE) && \ + !FLD64_ISSET((uint64_t)(state), WT_LOG_SLOT_RESERVED))) /* Slot is in use, all data copied into buffer */ #define WT_LOG_SLOT_INPROGRESS(state) \ (WT_LOG_SLOT_RELEASED(state) != WT_LOG_SLOT_JOINED(state)) @@ -185,7 +185,7 @@ struct __wt_logslot { #define WT_WITH_SLOT_LOCK(session, log, op) do { \ WT_ASSERT(session, !F_ISSET(session, WT_SESSION_LOCKED_SLOT)); \ WT_WITH_LOCK_WAIT(session, \ - &log->log_slot_lock, WT_SESSION_LOCKED_SLOT, op); \ + &(log)->log_slot_lock, WT_SESSION_LOCKED_SLOT, op); \ } while (0) struct __wt_myslot { diff --git a/src/include/misc.h b/src/include/misc.h index 7aba397e173..9161a215fdc 100644 --- a/src/include/misc.h +++ b/src/include/misc.h @@ -63,7 +63,7 @@ #define WT_MAX(a, b) ((a) < (b) ? (b) : (a)) /* Elements in an array. */ -#define WT_ELEMENTS(a) (sizeof(a) / sizeof(a[0])) +#define WT_ELEMENTS(a) (sizeof(a) / sizeof((a)[0])) /* 10 level skip lists, 1/4 have a link to the next element. */ #define WT_SKIP_MAXDEPTH 10 @@ -181,14 +181,14 @@ */ #define WT_BINARY_SEARCH(key, arrayp, n, found) do { \ uint32_t __base, __indx, __limit; \ - found = false; \ + (found) = false; \ for (__base = 0, __limit = (n); __limit != 0; __limit >>= 1) { \ __indx = __base + (__limit >> 1); \ - if ((arrayp)[__indx] < key) { \ + if ((arrayp)[__indx] < (key)) { \ __base = __indx + 1; \ --__limit; \ - } else if ((arrayp)[__indx] == key) { \ - found = true; \ + } else if ((arrayp)[__indx] == (key)) { \ + (found) = true; \ break; \ } \ } \ @@ -207,8 +207,8 @@ /* Check if a string matches a prefix. */ #define WT_PREFIX_MATCH(str, pfx) \ - (((const char *)(str))[0] == ((const char *)pfx)[0] && \ - strncmp((str), (pfx), strlen(pfx)) == 0) + (((const char *)(str))[0] == ((const char *)(pfx))[0] && \ + strncmp(str, pfx, strlen(pfx)) == 0) /* Check if a string matches a prefix, and move past it. */ #define WT_PREFIX_SKIP(str, pfx) \ @@ -225,8 +225,8 @@ /* Check if a string matches a byte string of len bytes. */ #define WT_STRING_MATCH(str, bytes, len) \ - (((const char *)str)[0] == ((const char *)bytes)[0] && \ - strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0') + (((const char *)(str))[0] == ((const char *)(bytes))[0] && \ + strncmp(str, bytes, len) == 0 && (str)[len] == '\0') /* * Macro that produces a string literal that isn't wrapped in quotes, to avoid diff --git a/src/include/mutex.i b/src/include/mutex.i index 6b83cb280d3..640706284c3 100644 --- a/src/include/mutex.i +++ b/src/include/mutex.i @@ -316,6 +316,6 @@ __wt_spin_trylock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t) stats = (int64_t **)S2C(session)->stats; stats[session->stat_bucket][t->stat_count_off]++; return (0); - } else - return (__wt_spin_trylock(session, t)); + } + return (__wt_spin_trylock(session, t)); } diff --git a/src/include/os.h b/src/include/os.h index 8505649a1fd..73d89268392 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -67,7 +67,7 @@ #define WT_TIMECMP(t1, t2) \ ((t1).tv_sec < (t2).tv_sec ? -1 : \ - (t1).tv_sec == (t2.tv_sec) ? \ + (t1).tv_sec == (t2).tv_sec ? \ (t1).tv_nsec < (t2).tv_nsec ? -1 : \ (t1).tv_nsec == (t2).tv_nsec ? 0 : 1 : 1) diff --git a/src/include/packing.i b/src/include/packing.i index 8ba3dd536ac..d79afe6d4a2 100644 --- a/src/include/packing.i +++ b/src/include/packing.i @@ -206,43 +206,43 @@ next: if (pack->cur == pack->end) #define WT_PACK_GET(session, pv, ap) do { \ WT_ITEM *__item; \ - switch (pv.type) { \ + switch ((pv).type) { \ case 'x': \ break; \ case 's': \ case 'S': \ - pv.u.s = va_arg(ap, const char *); \ + (pv).u.s = va_arg(ap, const char *); \ break; \ case 'U': \ case 'u': \ __item = va_arg(ap, WT_ITEM *); \ - pv.u.item.data = __item->data; \ - pv.u.item.size = __item->size; \ + (pv).u.item.data = __item->data; \ + (pv).u.item.size = __item->size; \ break; \ case 'b': \ case 'h': \ case 'i': \ - pv.u.i = va_arg(ap, int); \ + (pv).u.i = va_arg(ap, int); \ break; \ case 'B': \ case 'H': \ case 'I': \ case 't': \ - pv.u.u = va_arg(ap, unsigned int); \ + (pv).u.u = va_arg(ap, unsigned int); \ break; \ case 'l': \ - pv.u.i = va_arg(ap, long); \ + (pv).u.i = va_arg(ap, long); \ break; \ case 'L': \ - pv.u.u = va_arg(ap, unsigned long); \ + (pv).u.u = va_arg(ap, unsigned long); \ break; \ case 'q': \ - pv.u.i = va_arg(ap, int64_t); \ + (pv).u.i = va_arg(ap, int64_t); \ break; \ case 'Q': \ case 'r': \ case 'R': \ - pv.u.u = va_arg(ap, uint64_t); \ + (pv).u.u = va_arg(ap, uint64_t); \ break; \ /* User format strings have already been validated. */ \ WT_ILLEGAL_VALUE(session); \ @@ -556,47 +556,47 @@ __unpack_read(WT_SESSION_IMPL *session, #define WT_UNPACK_PUT(session, pv, ap) do { \ WT_ITEM *__item; \ - switch (pv.type) { \ + switch ((pv).type) { \ case 'x': \ break; \ case 's': \ case 'S': \ - *va_arg(ap, const char **) = pv.u.s; \ + *va_arg(ap, const char **) = (pv).u.s; \ break; \ case 'U': \ case 'u': \ __item = va_arg(ap, WT_ITEM *); \ - __item->data = pv.u.item.data; \ - __item->size = pv.u.item.size; \ + __item->data = (pv).u.item.data; \ + __item->size = (pv).u.item.size; \ break; \ case 'b': \ - *va_arg(ap, int8_t *) = (int8_t)pv.u.i; \ + *va_arg(ap, int8_t *) = (int8_t)(pv).u.i; \ break; \ case 'h': \ - *va_arg(ap, int16_t *) = (short)pv.u.i; \ + *va_arg(ap, int16_t *) = (short)(pv).u.i; \ break; \ case 'i': \ case 'l': \ - *va_arg(ap, int32_t *) = (int32_t)pv.u.i; \ + *va_arg(ap, int32_t *) = (int32_t)(pv).u.i; \ break; \ case 'q': \ - *va_arg(ap, int64_t *) = pv.u.i; \ + *va_arg(ap, int64_t *) = (pv).u.i; \ break; \ case 'B': \ case 't': \ - *va_arg(ap, uint8_t *) = (uint8_t)pv.u.u; \ + *va_arg(ap, uint8_t *) = (uint8_t)(pv).u.u; \ break; \ case 'H': \ - *va_arg(ap, uint16_t *) = (uint16_t)pv.u.u; \ + *va_arg(ap, uint16_t *) = (uint16_t)(pv).u.u; \ break; \ case 'I': \ case 'L': \ - *va_arg(ap, uint32_t *) = (uint32_t)pv.u.u; \ + *va_arg(ap, uint32_t *) = (uint32_t)(pv).u.u; \ break; \ case 'Q': \ case 'r': \ case 'R': \ - *va_arg(ap, uint64_t *) = pv.u.u; \ + *va_arg(ap, uint64_t *) = (pv).u.u; \ break; \ /* User format strings have already been validated. */ \ WT_ILLEGAL_VALUE(session); \ diff --git a/src/include/schema.h b/src/include/schema.h index 9a6e1e54e80..50e141d9921 100644 --- a/src/include/schema.h +++ b/src/include/schema.h @@ -107,10 +107,11 @@ struct __wt_table { * Acquire a lock if available, perform an operation, drop the lock. */ #define WT_WITH_LOCK_NOWAIT(session, ret, lock, flag, op) do { \ - ret = 0; \ + (ret) = 0; \ if (F_ISSET(session, (flag))) { \ op; \ - } else if ((ret = __wt_spin_trylock_track(session, lock)) == 0) {\ + } else if (((ret) = \ + __wt_spin_trylock_track(session, lock)) == 0) { \ F_SET(session, (flag)); \ op; \ F_CLR(session, (flag)); \ @@ -248,7 +249,7 @@ struct __wt_table { WT_SESSION_LOCKED_HANDLE_LIST)); \ if (F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)) { \ op; \ - } else if ((ret = __wt_try_writelock(session, \ + } else if (((ret) = __wt_try_writelock(session, \ &S2C(session)->table_lock)) == 0) { \ F_SET(session, WT_SESSION_LOCKED_TABLE_WRITE); \ op; \ diff --git a/src/include/session.h b/src/include/session.h index dec97cff5d3..674e92671b1 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -87,7 +87,7 @@ struct __wt_session_impl { void *meta_track_sub; /* Child transaction / save point */ size_t meta_track_alloc; /* Currently allocated */ int meta_track_nest; /* Nesting level of meta transaction */ -#define WT_META_TRACKING(session) (session->meta_track_next != NULL) +#define WT_META_TRACKING(session) ((session)->meta_track_next != NULL) /* * Each session keeps a cache of table handles. The set of handles diff --git a/src/include/stat.h b/src/include/stat.h index 8b2e78a4ed5..ed3d588b7d3 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -72,7 +72,7 @@ * and the session ID is a small, monotonically increasing number. */ #define WT_STATS_SLOT_ID(session) \ - ((session)->id) % WT_COUNTER_SLOTS + (((session)->id) % WT_COUNTER_SLOTS) /* * Statistic structures are arrays of int64_t's. We have functions to read/write diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 5dd9a720e31..7223aeae0f6 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -36,7 +36,7 @@ extern "C" { #if defined(DOXYGEN) || defined(SWIG) #define __F(func) func #else -#define __F(func) (*func) +#define __F(func) (*(func)) #endif #ifdef SWIG @@ -3073,27 +3073,27 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp); * transaction is in progress, it should be rolled back and the operation * retried in a new transaction. */ -#define WT_ROLLBACK -31800 +#define WT_ROLLBACK (-31800) /*! * Attempt to insert an existing key. * This error is generated when the application attempts to insert a record with * the same key as an existing record without the 'overwrite' configuration to * WT_SESSION::open_cursor. */ -#define WT_DUPLICATE_KEY -31801 +#define WT_DUPLICATE_KEY (-31801) /*! * Non-specific WiredTiger error. * This error is returned when an error is not covered by a specific error * return. */ -#define WT_ERROR -31802 +#define WT_ERROR (-31802) /*! * Item not found. * This error indicates an operation did not find a value to return. This * includes cursor search and other operations where no record matched the * cursor's search key such as WT_CURSOR::update or WT_CURSOR::remove. */ -#define WT_NOTFOUND -31803 +#define WT_NOTFOUND (-31803) /*! * WiredTiger library panic. * This error indicates an underlying problem that requires the application exit @@ -3101,17 +3101,17 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp); * returned from a WiredTiger interface, no further WiredTiger calls are * required. */ -#define WT_PANIC -31804 +#define WT_PANIC (-31804) /*! @cond internal */ /*! Restart the operation (internal). */ -#define WT_RESTART -31805 +#define WT_RESTART (-31805) /*! @endcond */ /*! * Recovery must be run to continue. * This error is generated when wiredtiger_open is configured to return an error * if recovery is required to use the database. */ -#define WT_RUN_RECOVERY -31806 +#define WT_RUN_RECOVERY (-31806) /*! * Operation would overflow cache. * This error is only generated when wiredtiger_open is configured to run in- @@ -3120,7 +3120,7 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp); * progress, it should be rolled back and the operation retried in a new * transaction. */ -#define WT_CACHE_FULL -31807 +#define WT_CACHE_FULL (-31807) /* * Error return section: END * DO NOT EDIT: automatically built by dist/api_err.py. diff --git a/src/log/log.c b/src/log/log.c index 3477ca52502..05234619d32 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -309,14 +309,11 @@ void __wt_log_written_reset(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; - WT_LOG *log; conn = S2C(session); - if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) - return; - log = conn->log; - log->log_written = 0; - return; + + if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) + conn->log->log_written = 0; } /* @@ -1775,9 +1772,8 @@ advance: if (eol) /* Found a hole. This LSN is the end. */ break; - else - /* Last record in log. Look for more. */ - goto advance; + /* Last record in log. Look for more. */ + goto advance; } rdup_len = __wt_rduppo2(reclen, allocsize); if (reclen > allocsize) { diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index bd1daaa6915..2a34240de46 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -10,10 +10,10 @@ #define WT_FORALL_CURSORS(clsm, c, i) \ for ((i) = (clsm)->nchunks; (i) > 0;) \ - if (((c) = (clsm)->chunks[--i]->cursor) != NULL) + if (((c) = (clsm)->chunks[--(i)]->cursor) != NULL) #define WT_LSM_CURCMP(s, lsm_tree, c1, c2, cmp) \ - __wt_compare(s, (lsm_tree)->collator, &(c1)->key, &(c2)->key, &cmp) + __wt_compare(s, (lsm_tree)->collator, &(c1)->key, &(c2)->key, &(cmp)) static int __clsm_lookup(WT_CURSOR_LSM *, WT_ITEM *); static int __clsm_open_cursors(WT_CURSOR_LSM *, bool, u_int, uint32_t); @@ -1223,7 +1223,8 @@ __clsm_lookup(WT_CURSOR_LSM *clsm, WT_ITEM *value) WT_LSM_TREE_STAT_INCR( session, clsm->lsm_tree->bloom_miss); continue; - } else if (ret == 0) + } + if (ret == 0) WT_LSM_TREE_STAT_INCR( session, clsm->lsm_tree->bloom_hit); WT_ERR(ret); @@ -1328,7 +1329,8 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp) if ((ret = c->search_near(c, &cmp)) == WT_NOTFOUND) { ret = 0; continue; - } else if (ret != 0) + } + if (ret != 0) goto err; /* Do we have an exact match? */ @@ -1348,7 +1350,8 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp) if ((ret = c->next(c)) == WT_NOTFOUND) { ret = 0; continue; - } else if (ret != 0) + } + if (ret != 0) goto err; } diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index ceb5f03a2f5..a06b736bf0a 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -187,7 +187,7 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, continue; if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM) || chunk->generation > 0) break; - else if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && + if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) && F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) break; } diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index 0b0801a8cca..358c43eab96 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -230,7 +230,7 @@ __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (__wt_atomic_cas32(&chunk->bloom_busy, 0, 1)) { if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) { ret = __lsm_bloom_create( - session, lsm_tree, chunk, (u_int)i); + session, lsm_tree, chunk, i); /* * Record if we were successful so that we can * later push a merge work unit. @@ -662,7 +662,8 @@ __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (drop_ret == EBUSY) { ++skipped; continue; - } else if (drop_ret != ENOENT) + } + if (drop_ret != ENOENT) WT_ERR(drop_ret); flush_metadata = true; @@ -673,7 +674,8 @@ __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (drop_ret == EBUSY) { ++skipped; continue; - } else if (drop_ret != ENOENT) + } + if (drop_ret != ENOENT) WT_ERR(drop_ret); flush_metadata = true; } diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 88d4397fcb5..23f654caa70 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -1395,7 +1395,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, */ #define WT_CHILD_RELEASE(session, hazard, ref) do { \ if (hazard) { \ - hazard = false; \ + (hazard) = false; \ WT_TRET( \ __wt_page_release(session, ref, WT_READ_NO_EVICT)); \ } \ @@ -1737,7 +1737,7 @@ __rec_copy_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_KV *kv) * WT_CELLs are typically small, 1 or 2 bytes -- don't call memcpy, do * the copy in-line. */ - for (p = (uint8_t *)r->first_free, + for (p = r->first_free, t = (uint8_t *)&kv->cell, len = kv->cell_len; len > 0; --len) *p++ = *t++; @@ -2889,7 +2889,7 @@ no_slots: len = WT_PTRDIFF( r->first_free, (uint8_t *)dsk + dsk_dst->mem_size); dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); - (void)memmove(dsk_start, (uint8_t *)r->first_free - len, len); + (void)memmove(dsk_start, r->first_free - len, len); r->entries -= r->raw_entries[result_slots - 1]; r->first_free = dsk_start + len; @@ -3605,16 +3605,7 @@ __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) r = cbulk->reconcile; r->is_bulk_load = true; - recno = WT_RECNO_OOB; /* -Werror=maybe-uninitialized */ - switch (btree->type) { - case BTREE_COL_FIX: - case BTREE_COL_VAR: - recno = 1; - break; - case BTREE_ROW: - recno = WT_RECNO_OOB; - break; - } + recno = btree->type == BTREE_ROW ? WT_RECNO_OOB : 1; return (__rec_split_init( session, r, cbulk->leaf, recno, btree->maxleafpage)); diff --git a/src/support/crypto.c b/src/support/crypto.c index ab94ec2c829..cce0d228832 100644 --- a/src/support/crypto.c +++ b/src/support/crypto.c @@ -133,5 +133,4 @@ __wt_encrypt_size(WT_SESSION_IMPL *session, return; *sizep = incoming_size + kencryptor->size_const + WT_ENCRYPT_LEN_SIZE; - return; } diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 80cdf1cd39b..6c97922f7e1 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -422,7 +422,6 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session) __wt_sleep(0, stepdown_us / 10); __wt_epoch(session, &stop); current_us = WT_TIMEDIFF_US(stop, last); - total_ms = WT_TIMEDIFF_MS(stop, start); bytes_written_total = cache->bytes_written - bytes_written_start; diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c index 2d8a77a69e6..30932195b1e 100644 --- a/src/txn/txn_recover.c +++ b/src/txn/txn_recover.c @@ -93,7 +93,7 @@ __recovery_cursor(WT_SESSION_IMPL *session, WT_RECOVERY *r, "%s op %" PRIu32 " to file %" PRIu32 " at LSN %" PRIu32 \ "/%" PRIu32, \ cursor == NULL ? "Skipping" : "Applying", \ - optype, fileid, lsnp->l.file, lsnp->l.offset); \ + optype, fileid, (lsnp)->l.file, (lsnp)->l.offset); \ if (cursor == NULL) \ break diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c index cded40a8b45..947fa7bf9ef 100644 --- a/src/utilities/util_dump.c +++ b/src/utilities/util_dump.c @@ -504,17 +504,18 @@ dump_prefix(WT_SESSION *session, bool hex, bool json) (void)wiredtiger_version(&vmajor, &vminor, &vpatch); + if (json && printf( + " \"%s\" : \"%d (%d.%d.%d)\",\n", + DUMP_JSON_VERSION_MARKER, DUMP_JSON_CURRENT_VERSION, + vmajor, vminor, vpatch) < 0) + return (util_err(session, EIO, NULL)); + if (!json && (printf( "WiredTiger Dump (WiredTiger Version %d.%d.%d)\n", vmajor, vminor, vpatch) < 0 || printf("Format=%s\n", hex ? "hex" : "print") < 0 || printf("Header\n") < 0)) return (util_err(session, EIO, NULL)); - else if (json && printf( - " \"%s\" : \"%d (%d.%d.%d)\",\n", - DUMP_JSON_VERSION_MARKER, DUMP_JSON_CURRENT_VERSION, - vmajor, vminor, vpatch) < 0) - return (util_err(session, EIO, NULL)); return (0); } diff --git a/src/utilities/util_load.c b/src/utilities/util_load.c index ca77643eb49..d31fa4c9d08 100644 --- a/src/utilities/util_load.c +++ b/src/utilities/util_load.c @@ -80,8 +80,8 @@ util_load(WT_SESSION *session, int argc, char *argv[]) if (no_overwrite) flags |= LOAD_JSON_NO_OVERWRITE; return (util_load_json(session, filename, flags)); - } else - return (load_dump(session)); + } + return (load_dump(session)); } /* diff --git a/src/utilities/util_main.c b/src/utilities/util_main.c index 7157f0d90fe..68e3b0f1bc5 100644 --- a/src/utilities/util_main.c +++ b/src/utilities/util_main.c @@ -20,7 +20,43 @@ static const char *command; /* Command name */ #define REC_LOGOFF "log=(enabled=false)" #define REC_RECOVER "log=(recover=on)" -static int usage(void); +static void +usage(void) +{ + fprintf(stderr, + "WiredTiger Data Engine (version %d.%d)\n", + WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR); + fprintf(stderr, + "global options:\n" + "\t" "-C\t" "wiredtiger_open configuration\n" + "\t" "-h\t" "database directory\n" + "\t" "-L\t" "turn logging off for debug-mode\n" + "\t" "-R\t" "run recovery if configured\n" + "\t" "-V\t" "display library version and exit\n" + "\t" "-v\t" "verbose\n"); + fprintf(stderr, + "commands:\n" + "\t" "alter\t alter an object\n" + "\t" "backup\t database backup\n" + "\t" "compact\t compact an object\n" + "\t" "copyright copyright information\n" + "\t" "create\t create an object\n" + "\t" "drop\t drop an object\n" + "\t" "dump\t dump an object\n" + "\t" "list\t list database objects\n" + "\t" "load\t load an object\n" + "\t" "loadtext load an object from a text file\n" + "\t" "printlog display the database log\n" + "\t" "read\t read values from an object\n" + "\t" "rebalance rebalance an object\n" + "\t" "rename\t rename an object\n" + "\t" "salvage\t salvage a file\n" + "\t" "stat\t display statistics for an object\n" + "\t" "truncate truncate an object, removing all content\n" + "\t" "upgrade\t upgrade an object\n" + "\t" "verify\t verify an object\n" + "\t" "write\t write values to an object\n"); +} int main(int argc, char *argv[]) @@ -73,8 +109,9 @@ main(int argc, char *argv[]) cmd_config = __wt_optarg; break; case 'E': /* secret key */ + free(secretkey); /* lint: set more than once */ if ((secretkey = strdup(__wt_optarg)) == NULL) { - ret = util_err(NULL, errno, NULL); + (void)util_err(NULL, errno, NULL); goto err; } memset(__wt_optarg, 0, strlen(__wt_optarg)); @@ -92,24 +129,27 @@ main(int argc, char *argv[]) break; case 'V': /* version */ printf("%s\n", wiredtiger_version(NULL, NULL, NULL)); - return (EXIT_SUCCESS); + goto done; case 'v': /* verbose */ verbose = true; break; case '?': default: - return (usage()); + usage(); + goto err; } if (logoff && recover) { fprintf(stderr, "Only one of -L and -R is allowed.\n"); - return (EXIT_FAILURE); + goto err; } argc -= __wt_optind; argv += __wt_optind; /* The next argument is the command name. */ - if (argc < 1) - return (usage()); + if (argc < 1) { + usage(); + goto err; + } command = argv[0]; /* Reset getopt. */ @@ -130,7 +170,7 @@ main(int argc, char *argv[]) func = util_compact; else if (strcmp(command, "copyright") == 0) { util_copyright(); - return (EXIT_SUCCESS); + goto done; } else if (strcmp(command, "create") == 0) { func = util_create; config = "create"; @@ -194,8 +234,10 @@ main(int argc, char *argv[]) default: break; } - if (func == NULL) - return (usage()); + if (func == NULL) { + usage(); + goto err; + } /* Build the configuration string. */ len = 10; /* some slop */ @@ -212,7 +254,7 @@ main(int argc, char *argv[]) } len += strlen(rec_config); if ((p = malloc(len)) == NULL) { - ret = util_err(NULL, errno, NULL); + (void)util_err(NULL, errno, NULL); goto err; } (void)snprintf(p, len, "%s,%s,%s%s%s%s", @@ -223,19 +265,24 @@ main(int argc, char *argv[]) /* Open the database and a session. */ if ((ret = wiredtiger_open(home, verbose ? verbose_handler : NULL, config, &conn)) != 0) { - ret = util_err(NULL, ret, NULL); + (void)util_err(NULL, ret, NULL); goto err; } if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) { - ret = util_err(NULL, ret, NULL); + (void)util_err(NULL, ret, NULL); goto err; } /* Call the function. */ ret = func(session, argc, argv); + if (0) { +err: ret = 1; + } +done: + /* Close the database. */ -err: if (conn != NULL && (tret = conn->close(conn, NULL)) != 0 && ret == 0) + if (conn != NULL && (tret = conn->close(conn, NULL)) != 0 && ret == 0) ret = tret; free(p); @@ -244,46 +291,6 @@ err: if (conn != NULL && (tret = conn->close(conn, NULL)) != 0 && ret == 0) return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } -static int -usage(void) -{ - fprintf(stderr, - "WiredTiger Data Engine (version %d.%d)\n", - WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR); - fprintf(stderr, - "global options:\n" - "\t" "-C\t" "wiredtiger_open configuration\n" - "\t" "-h\t" "database directory\n" - "\t" "-L\t" "turn logging off for debug-mode\n" - "\t" "-R\t" "run recovery if configured\n" - "\t" "-V\t" "display library version and exit\n" - "\t" "-v\t" "verbose\n"); - fprintf(stderr, - "commands:\n" - "\t" "alter\t alter an object\n" - "\t" "backup\t database backup\n" - "\t" "compact\t compact an object\n" - "\t" "copyright copyright information\n" - "\t" "create\t create an object\n" - "\t" "drop\t drop an object\n" - "\t" "dump\t dump an object\n" - "\t" "list\t list database objects\n" - "\t" "load\t load an object\n" - "\t" "loadtext load an object from a text file\n" - "\t" "printlog display the database log\n" - "\t" "read\t read values from an object\n" - "\t" "rebalance rebalance an object\n" - "\t" "rename\t rename an object\n" - "\t" "salvage\t salvage a file\n" - "\t" "stat\t display statistics for an object\n" - "\t" "truncate truncate an object, removing all content\n" - "\t" "upgrade\t upgrade an object\n" - "\t" "verify\t verify an object\n" - "\t" "write\t write values to an object\n"); - - return (EXIT_FAILURE); -} - /* * util_uri -- * Build a name. @@ -314,7 +321,7 @@ util_uri(WT_SESSION *session, const char *s, const char *type) * the default type for the operation. */ if (strchr(s, ':') != NULL) - strcpy(name, s); + snprintf(name, len, "%s", s); else snprintf(name, len, "%s:%s", type, s); return (name); -- cgit v1.2.1 From 19fac80017eee9758d8109ab94796231d4995f33 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 17 Mar 2017 01:15:06 -0400 Subject: WT-3224 Prevent splits in LSM primaries (#3335) Move lsm_primary check near evict_disabled check. The assertion was caused by `WT_BTREE_NO_RECONCILE`, which allows in-memory splits even when eviction is disabled. Rename that flag `WT_BTREE_ALLOW_SPLITS` for clarity. --- src/btree/bt_read.c | 3 ++- src/btree/bt_sync.c | 4 ++-- src/evict/evict_page.c | 4 ++-- src/include/btree.h | 12 ++++++------ src/include/btree.i | 1 - 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index b170a9fb900..64874547b9c 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -592,8 +592,9 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags */ if (LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(session, WT_SESSION_NO_EVICTION) || + btree->lsm_primary || (btree->evict_disabled > 0 && - !F_ISSET(btree, WT_BTREE_NO_RECONCILE))) + !F_ISSET(btree, WT_BTREE_ALLOW_SPLITS))) goto skip_evict; /* diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index cdb27752fb7..ead6ccc4ac0 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -188,9 +188,9 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) * to grow significantly larger than the configured maximum * size. */ - F_SET(btree, WT_BTREE_NO_RECONCILE); + F_SET(btree, WT_BTREE_ALLOW_SPLITS); ret = __wt_evict_file_exclusive_on(session); - F_CLR(btree, WT_BTREE_NO_RECONCILE); + F_CLR(btree, WT_BTREE_ALLOW_SPLITS); WT_ERR(ret); __wt_evict_file_exclusive_off(session); diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index 5b17a78a4dd..85689efd0b1 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -480,8 +480,8 @@ __evict_review( if (LF_ISSET(WT_EVICT_INMEM_SPLIT)) return (__wt_split_insert(session, ref)); - /* We are done if reconciliation is disabled. */ - if (F_ISSET(S2BT(session), WT_BTREE_NO_RECONCILE)) + /* If splits are the only permitted operation, we're done. */ + if (F_ISSET(S2BT(session), WT_BTREE_ALLOW_SPLITS)) return (EBUSY); } diff --git a/src/include/btree.h b/src/include/btree.h index 15a68474fdf..88312f408cc 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -157,14 +157,14 @@ struct __wt_btree { WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */ /* Flags values up to 0xff are reserved for WT_DHANDLE_* */ -#define WT_BTREE_BULK 0x000100 /* Bulk-load handle */ -#define WT_BTREE_CLOSED 0x000200 /* Handle closed */ -#define WT_BTREE_IGNORE_CACHE 0x000400 /* Cache-resident object */ -#define WT_BTREE_IN_MEMORY 0x000800 /* Cache-resident object */ -#define WT_BTREE_LOOKASIDE 0x001000 /* Look-aside table */ +#define WT_BTREE_ALLOW_SPLITS 0x000100 /* Allow splits, even with no evict */ +#define WT_BTREE_BULK 0x000200 /* Bulk-load handle */ +#define WT_BTREE_CLOSED 0x000400 /* Handle closed */ +#define WT_BTREE_IGNORE_CACHE 0x000800 /* Cache-resident object */ +#define WT_BTREE_IN_MEMORY 0x001000 /* Cache-resident object */ +#define WT_BTREE_LOOKASIDE 0x002000 /* Look-aside table */ #define WT_BTREE_NO_CHECKPOINT 0x004000 /* Disable checkpoints */ #define WT_BTREE_NO_LOGGING 0x008000 /* Disable logging */ -#define WT_BTREE_NO_RECONCILE 0x010000 /* Allow splits, even with no evict */ #define WT_BTREE_REBALANCE 0x020000 /* Handle is for rebalance */ #define WT_BTREE_SALVAGE 0x040000 /* Handle is for salvage */ #define WT_BTREE_SKIP_CKPT 0x080000 /* Handle skipped checkpoint */ diff --git a/src/include/btree.i b/src/include/btree.i index c0c5c7c5a8d..eefc2db075d 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1229,7 +1229,6 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) * data in the last skiplist on the page. Split if there are enough * items and the skiplist does not fit within a single disk page. */ - ins_head = page->type == WT_PAGE_ROW_LEAF ? (page->entries == 0 ? WT_ROW_INSERT_SMALLEST(page) : -- cgit v1.2.1 From 5cf626fda029c966c5c1eea7916fa7e8d12e6330 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 17 Mar 2017 15:50:21 -0400 Subject: WT-3227 Python test suite inserts unnecessary whitespace in error output. (#3338) The Python test suite uses "XXX: " as its error prefix, and the WiredTiger error routines append a comma and space after the error prefix in error messages. This means the error messages come out "XXX: , YYY". Remove the comma and space from the declared error_prefix so the error messages come out "XXX, YYY". --- test/suite/test_compact02.py | 2 +- test/suite/test_encrypt04.py | 2 +- test/suite/test_shared_cache01.py | 2 +- test/suite/test_shared_cache02.py | 2 +- test/suite/test_txn07.py | 2 +- test/suite/wttest.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/suite/test_compact02.py b/test/suite/test_compact02.py index 7af76b5fd58..803600eea14 100644 --- a/test/suite/test_compact02.py +++ b/test/suite/test_compact02.py @@ -99,7 +99,7 @@ class test_compact02(wttest.WiredTigerTestCase): def ConnectionOpen(self, cacheSize): self.home = '.' conn_params = 'create,' + \ - cacheSize + ',error_prefix="%s: ",' % self.shortid() + \ + cacheSize + ',error_prefix="%s",' % self.shortid() + \ 'statistics=(all),' + \ 'eviction_dirty_target=99,eviction_dirty_trigger=99' try: diff --git a/test/suite/test_encrypt04.py b/test/suite/test_encrypt04.py index 17777fc9564..19c0b85d427 100644 --- a/test/suite/test_encrypt04.py +++ b/test/suite/test_encrypt04.py @@ -113,7 +113,7 @@ class test_encrypt04(wttest.WiredTigerTestCase, suite_subprocess): completed = False try: conn = self.wiredtiger_open(dir, - 'create,error_prefix="{0}: ",{1}{2}'.format( + 'create,error_prefix="{0}",{1}{2}'.format( self.shortid(), encarg, extarg)) except (BaseException) as err: # Capture the recognizable error created by rotn diff --git a/test/suite/test_shared_cache01.py b/test/suite/test_shared_cache01.py index 70560a625ee..c3bd946cc4b 100644 --- a/test/suite/test_shared_cache01.py +++ b/test/suite/test_shared_cache01.py @@ -73,7 +73,7 @@ class test_shared_cache01(wttest.WiredTigerTestCase): os.mkdir(name) next_conn = self.wiredtiger_open( name, - 'create,error_prefix="' + self.shortid() + ': "' + + 'create,error_prefix="%s",' % self.shortid() + pool_opts + extra_opts) self.conns.append(next_conn) self.sessions.append(next_conn.open_session(None)) diff --git a/test/suite/test_shared_cache02.py b/test/suite/test_shared_cache02.py index 7cde6c86695..67f9bf7c6b7 100644 --- a/test/suite/test_shared_cache02.py +++ b/test/suite/test_shared_cache02.py @@ -73,7 +73,7 @@ class test_shared_cache02(wttest.WiredTigerTestCase): os.mkdir(name) next_conn = self.wiredtiger_open( name, - 'create,error_prefix="' + self.shortid() + ': "' + + 'create,error_prefix="%s",' % self.shortid() + pool_opts + extra_opts) self.conns.append(next_conn) self.sessions.append(next_conn.open_session(None)) diff --git a/test/suite/test_txn07.py b/test/suite/test_txn07.py index e2986fb999a..e26cf5aaaea 100644 --- a/test/suite/test_txn07.py +++ b/test/suite/test_txn07.py @@ -76,7 +76,7 @@ class test_txn07(wttest.WiredTigerTestCase, suite_subprocess): def conn_config(self): return 'log=(archive=false,enabled,file_max=%s,' % self.logmax + \ 'compressor=%s)' % self.compress + \ - ',create,error_prefix="%s: ",' % self.shortid() + \ + ',create,error_prefix="%s",' % self.shortid() + \ "statistics=(fast)," + \ 'transaction_sync="%s",' % \ self.sync_list[self.scenario_number % len(self.sync_list)] diff --git a/test/suite/wttest.py b/test/suite/wttest.py index 0dce51f07d5..e91838544b9 100644 --- a/test/suite/wttest.py +++ b/test/suite/wttest.py @@ -302,7 +302,7 @@ class WiredTigerTestCase(unittest.TestCase): # In case the open starts additional threads, flush first to # avoid confusion. sys.stdout.flush() - conn_param = 'create,error_prefix="%s: ",%s' % (self.shortid(), config) + conn_param = 'create,error_prefix="%s",%s' % (self.shortid(), config) try: conn = self.wiredtiger_open(home, conn_param) except wiredtiger.WiredTigerError as e: -- cgit v1.2.1 From 89c063c2acb0f901725f0cd838503c983687a49f Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Mon, 20 Mar 2017 16:58:17 +1100 Subject: WT-3196 Prevent eviction in LSM primaries after the are flushed. (#3336) Once an LSM primary is known to be on disk, we expect readers to use the checkpoint. The original page image for the primary will then be discarded by an LSM worker thread. We previously allowed the LSM primary to be evicted in between so that eviction workers can deal with cache pressure ahead of the LSM worker threads discarding the chunk. However, that leads to cases where application threads end up evicting a 100MB page, and also means that discarding the chunk needs to worry about split generations (the cause of the assertion failure here). The solution suggested here is simple: never enable eviction in LSM primaries, which also means we never need to fix up cache accounting. --- src/lsm/lsm_work_unit.c | 69 +++---------------------------------------------- 1 file changed, 3 insertions(+), 66 deletions(-) diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index 358c43eab96..10b85d573aa 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -255,51 +255,6 @@ err: return (ret); } -/* - * __lsm_switch_primary_off -- - * Switch when a btree handle is no longer the current primary chunk of - * an LSM tree. - */ -static void -__lsm_switch_primary_off(WT_SESSION_IMPL *session) -{ - WT_BTREE *btree; - WT_CACHE *cache; - WT_PAGE *child, *root; - WT_PAGE_INDEX *pindex; - WT_REF *first; - size_t size; - - btree = S2BT(session); - cache = S2C(session)->cache; - root = btree->root.page; - pindex = WT_INTL_INDEX_GET_SAFE(root); - - /* Diagnostic: assert we've never split. */ - WT_ASSERT(session, pindex->entries == 1); - - /* - * We're reaching down into the page without a hazard pointer, - * but that's OK because we know that no-eviction is set so the - * page can't disappear. - * - * While this tree was the primary, its dirty bytes were not - * included in the cache accounting. Fix that now before we - * open it up for eviction. - */ - first = pindex->index[0]; - child = first->page; - if (first->state == WT_REF_MEM && - child->type == WT_PAGE_ROW_LEAF && __wt_page_is_modified(child)) { - size = child->modify->bytes_dirty; - (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); - (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); - } - - /* Configure eviction. */ - __wt_evict_file_exclusive_off(session); -} - /* * __wt_lsm_checkpoint_chunk -- * Flush a single LSM chunk to disk. @@ -308,7 +263,6 @@ int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) { - WT_BTREE *btree; WT_DECL_RET; WT_TXN_ISOLATION saved_isolation; bool flush_set, release_btree; @@ -322,9 +276,8 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && !F_ISSET(chunk, WT_LSM_CHUNK_STABLE) && !chunk->evicted) { - WT_WITH_HANDLE_LIST_WRITE_LOCK(session, - ret = __lsm_discard_handle(session, chunk->uri, NULL)); - if (ret == 0) + if ((ret = + __lsm_discard_handle(session, chunk->uri, NULL)) == 0) chunk->evicted = 1; else if (ret == EBUSY) ret = 0; @@ -397,20 +350,6 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, if (ret != 0) WT_ERR_MSG(session, ret, "LSM checkpoint"); - /* - * If the chunk is the lsm primary, clear the no-eviction flag so it can - * be evicted and eventually closed. Only do once, and only do after the - * checkpoint has succeeded: otherwise, accessing the leaf page during - * the checkpoint can trigger forced eviction. - * - * We don't have to worry about races here, we're single-threaded. - */ - btree = S2BT(session); - if (btree->lsm_primary) { - __lsm_switch_primary_off(session); - btree->lsm_primary = false; - } - release_btree = false; WT_ERR(__wt_session_release_btree(session)); @@ -569,9 +508,7 @@ __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri) * * This will fail with EBUSY if the file is still in use. */ - WT_WITH_HANDLE_LIST_WRITE_LOCK(session, - ret = __lsm_discard_handle(session, uri, WT_CHECKPOINT)); - WT_RET(ret); + WT_RET(__lsm_discard_handle(session, uri, WT_CHECKPOINT)); /* * Take the schema lock for the drop operation. Since __wt_schema_drop -- cgit v1.2.1 From cfdf4394aa39209d402a9006661810cda3bdb38d Mon Sep 17 00:00:00 2001 From: sueloverso Date: Mon, 20 Mar 2017 15:04:18 -0400 Subject: WT-2990 Restore use of dhandle lock in LSM. (#3342) --- src/lsm/lsm_work_unit.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index 10b85d573aa..e6a29666094 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -276,8 +276,9 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) && !F_ISSET(chunk, WT_LSM_CHUNK_STABLE) && !chunk->evicted) { - if ((ret = - __lsm_discard_handle(session, chunk->uri, NULL)) == 0) + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, + ret = __lsm_discard_handle(session, chunk->uri, NULL)); + if (ret == 0) chunk->evicted = 1; else if (ret == EBUSY) ret = 0; @@ -508,7 +509,9 @@ __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri) * * This will fail with EBUSY if the file is still in use. */ - WT_RET(__lsm_discard_handle(session, uri, WT_CHECKPOINT)); + WT_WITH_HANDLE_LIST_WRITE_LOCK(session, + ret = __lsm_discard_handle(session, uri, WT_CHECKPOINT)); + WT_RET(ret); /* * Take the schema lock for the drop operation. Since __wt_schema_drop -- cgit v1.2.1 From 620398dc9a90b401aa9e4437c834bfbb2a6d9a6d Mon Sep 17 00:00:00 2001 From: Sulabh Mahajan Date: Thu, 23 Mar 2017 11:57:44 +1100 Subject: WT-3202 Add in_memory config opt, do not reopen connection if db is in_memory (#3341) --- bench/wtperf/wtperf.c | 13 ++++++++++++- bench/wtperf/wtperf_opt.i | 2 ++ src/docs/wtperf.dox | 2 ++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 7f5e5ad3373..b5aff21bdbc 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -1655,6 +1655,9 @@ close_reopen(WTPERF *wtperf) opts = wtperf->opts; + if (opts->in_memory) + return (0); + if (!opts->readonly && !opts->reopen_connection) return (0); /* @@ -2568,7 +2571,8 @@ main(int argc, char *argv[]) /* Concatenate non-default configuration strings. */ if ((opts->verbose > 1 && strlen(debug_cconfig) != 0) || user_cconfig != NULL || opts->session_count_idle > 0 || - wtperf->compress_ext != NULL || wtperf->async_config != NULL) { + wtperf->compress_ext != NULL || wtperf->async_config != NULL || + opts->in_memory) { req_len = 20; req_len += wtperf->async_config != NULL ? strlen(wtperf->async_config) : 0; @@ -2583,6 +2587,7 @@ main(int argc, char *argv[]) opts->session_count_idle + wtperf->workers_cnt + opts->populate_threads + 10); } + req_len += opts->in_memory ? strlen("in_memory=true") : 0; req_len += user_cconfig != NULL ? strlen(user_cconfig) : 0; req_len += debug_cconfig != NULL ? strlen(debug_cconfig) : 0; cc_buf = dmalloc(req_len); @@ -2603,6 +2608,12 @@ main(int argc, char *argv[]) append_comma, wtperf->compress_ext); append_comma = ","; } + if (opts->in_memory) { + pos += (size_t)snprintf( + cc_buf + pos, req_len - pos, "%s%s", + append_comma, "in_memory=true"); + append_comma = ","; + } if (sess_cfg != NULL && strlen(sess_cfg) != 0) { pos += (size_t)snprintf( cc_buf + pos, req_len - pos, "%s%s", diff --git a/bench/wtperf/wtperf_opt.i b/bench/wtperf/wtperf_opt.i index 63cef4c28fb..90f70457407 100644 --- a/bench/wtperf/wtperf_opt.i +++ b/bench/wtperf/wtperf_opt.i @@ -110,6 +110,8 @@ DEF_OPT_AS_UINT32(database_count, 1, DEF_OPT_AS_BOOL(drop_tables, 0, "Whether to drop all tables at the end of the run, and report time taken" " to do the drop.") +DEF_OPT_AS_BOOL(in_memory, 0, + "Whether to create the database in-memory.") DEF_OPT_AS_UINT32(icount, 5000, "number of records to initially populate. If multiple tables are " "configured the count is spread evenly across all tables.") diff --git a/src/docs/wtperf.dox b/src/docs/wtperf.dox index 2eac0fef3f4..6bdcf5f4f8d 100644 --- a/src/docs/wtperf.dox +++ b/src/docs/wtperf.dox @@ -167,6 +167,8 @@ do population phase; false to use existing database number of WiredTiger databases to use. Each database will execute the workload using a separate home directory and complete set of worker threads @par drop_tables (boolean, default=false) Whether to drop all tables at the end of the run, and report time taken to do the drop. +@par in_memory (boolean, default=false) +Whether to create the database in-memory. @par icount (unsigned int, default=5000) number of records to initially populate. If multiple tables are configured the count is spread evenly across all tables. @par idle_table_cycle (unsigned int, default=0) -- cgit v1.2.1 From 6bd63027a6fd00db3f0f379acb929c22cd1b7f6f Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 23 Mar 2017 02:27:54 -0400 Subject: SERVER-28194 Missing WiredTiger.turtle file loses data (#3337) There's a two step process on Windows to rename files (including the turtle file), remove the original and then move the replacement into place -- a DeleteFileW followed by a MoveFileW. If we crash in the middle (and in SERVER-28194, it looks like there's a weirder failure mode, where the DeleteFileW succeeded, but the file was still there), we can be left without a turtle file, which will lose all of the data in the database. * Add the MOVEFILE_WRITE_THROUGH flag to the MoveFileEx call. If we somehow end up in a copy-then-delete path, that flag adds a disk flush after the copy phase, so the window of vulnerability is as short as possible. --- dist/s_string.ok | 5 +++-- src/os_win/os_fs.c | 23 ++++++++++------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/dist/s_string.ok b/dist/s_string.ok index 39b6b163cd9..1f7f7d9fd3a 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -225,8 +225,10 @@ MEMALIGN MERCHANTABILITY METADATA MONGODB +MOVEFILE MRXB MRXBOPC +MSDN MSVC MULTI MULTIBLOCK @@ -240,8 +242,7 @@ Metadata Mewhort Mitzenmacher MongoDB -MoveFile -MoveFileW +MoveFileExW Multi MultiByteToWideChar Multithreaded diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c index 2f76fff04a5..5cf47ea5763 100644 --- a/src/os_win/os_fs.c +++ b/src/os_win/os_fs.c @@ -87,22 +87,19 @@ __win_fs_rename(WT_FILE_SYSTEM *file_system, WT_ERR(__wt_to_utf16_string(session, to, &to_wide)); /* - * Check if file exists since Windows does not override the file if - * it exists. + * We want an atomic rename, but that's not guaranteed by MoveFileExW + * (or by any MSDN API). Don't set the MOVEFILE_COPY_ALLOWED flag to + * prevent the system from falling back to a copy and delete process. + * Do set the MOVEFILE_WRITE_THROUGH flag so the window is as small + * as possible, just in case. WiredTiger renames are done in a single + * directory and we expect that to be an atomic metadata update on any + * modern filesystem. */ - if (GetFileAttributesW(to_wide->data) != INVALID_FILE_ATTRIBUTES) - if (DeleteFileW(to_wide->data) == FALSE) { - windows_error = __wt_getlasterror(); - __wt_errx(session, - "%s: file-rename: DeleteFileW: %s", - to, __wt_formatmessage(session, windows_error)); - WT_ERR(__wt_map_windows_error(windows_error)); - } - - if (MoveFileW(from_wide->data, to_wide->data) == FALSE) { + if (MoveFileExW(from_wide->data, to_wide->data, + MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH) == FALSE) { windows_error = __wt_getlasterror(); __wt_errx(session, - "%s to %s: file-rename: MoveFileW: %s", + "%s to %s: file-rename: MoveFileExW: %s", from, to, __wt_formatmessage(session, windows_error)); WT_ERR(__wt_map_windows_error(windows_error)); } -- cgit v1.2.1 From 81df9eadd01427173e7b14525f53723a33a7235e Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 23 Mar 2017 21:16:42 -0400 Subject: WT-3234 Update WiredTiger build for clang 4.0. (#3345) * Update WiredTiger build for clang 4.0. ex_all.c:852:7: error: possible misuse of comma operator here [-Werror,-Wcomma] p1++, p2++; ^ ex_all.c:852:3: note: cast expression to void to silence warning p1++, p2++; ^~~~ (void)( ) 1 error generated. * wtperf.c:2670:4: error: code will never be executed [-Werror,-Wunreachable-code] pos += (size_t)snprintf( ^~~ wtperf.c:2669:23: note: silence by adding parentheses to mark code as explicitly dead if (opts->verbose > 1 && strlen(debug_tconfig) != 0) ^ /* DISABLES CODE */ ( ) wtperf.c:2630:4: error: code will never be executed [-Werror,-Wunreachable-code] pos += (size_t)snprintf( ^~~ wtperf.c:2629:23: note: silence by adding parentheses to mark code as explicitly dead if (opts->verbose > 1 && strlen(debug_cconfig) != 0) ^ /* DISABLES CODE */ ( ) 2 errors generated. --- bench/wtperf/wtperf.c | 18 ++---------------- examples/c/ex_all.c | 4 ++-- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index b5aff21bdbc..772dedac8c8 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -32,9 +32,6 @@ #define DEFAULT_HOME "WT_TEST" #define DEFAULT_MONITOR_DIR "WT_TEST" -static const char * const debug_cconfig = ""; -static const char * const debug_tconfig = ""; - static void *checkpoint_worker(void *); static int drop_all_tables(WTPERF *); static int execute_populate(WTPERF *); @@ -2569,8 +2566,7 @@ main(int argc, char *argv[]) __wt_stream_set_line_buffer(stdout); /* Concatenate non-default configuration strings. */ - if ((opts->verbose > 1 && strlen(debug_cconfig) != 0) || - user_cconfig != NULL || opts->session_count_idle > 0 || + if (user_cconfig != NULL || opts->session_count_idle > 0 || wtperf->compress_ext != NULL || wtperf->async_config != NULL || opts->in_memory) { req_len = 20; @@ -2589,7 +2585,6 @@ main(int argc, char *argv[]) } req_len += opts->in_memory ? strlen("in_memory=true") : 0; req_len += user_cconfig != NULL ? strlen(user_cconfig) : 0; - req_len += debug_cconfig != NULL ? strlen(debug_cconfig) : 0; cc_buf = dmalloc(req_len); pos = 0; @@ -2626,23 +2621,18 @@ main(int argc, char *argv[]) append_comma, user_cconfig); append_comma = ","; } - if (opts->verbose > 1 && strlen(debug_cconfig) != 0) - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, debug_cconfig); if (strlen(cc_buf) != 0 && (ret = config_opt_name_value(wtperf, "conn_config", cc_buf)) != 0) goto err; } - if ((opts->verbose > 1 && strlen(debug_tconfig) != 0) || opts->index || + if (opts->index || user_tconfig != NULL || wtperf->compress_table != NULL) { req_len = 20; req_len += wtperf->compress_table != NULL ? strlen(wtperf->compress_table) : 0; req_len += opts->index ? strlen(INDEX_COL_NAMES) : 0; req_len += user_tconfig != NULL ? strlen(user_tconfig) : 0; - req_len += debug_tconfig != NULL ? strlen(debug_tconfig) : 0; tc_buf = dmalloc(req_len); pos = 0; @@ -2666,10 +2656,6 @@ main(int argc, char *argv[]) append_comma, user_tconfig); append_comma = ","; } - if (opts->verbose > 1 && strlen(debug_tconfig) != 0) - pos += (size_t)snprintf( - tc_buf + pos, req_len - pos, "%s%s", - append_comma, debug_tconfig); if (strlen(tc_buf) != 0 && (ret = config_opt_name_value(wtperf, "table_config", tc_buf)) != 0) diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c index 8a1533011b2..82620673fe1 100644 --- a/examples/c/ex_all.c +++ b/examples/c/ex_all.c @@ -848,8 +848,8 @@ my_compare(WT_COLLATOR *collator, WT_SESSION *session, p1 = (const char *)value1->data; p2 = (const char *)value2->data; - while (*p1 != '\0' && *p1 == *p2) - p1++, p2++; + for (; *p1 != '\0' && *p1 == *p2; ++p1, ++p2) + ; *cmp = (int)*p2 - (int)*p1; return (0); -- cgit v1.2.1 From e4edaa7b73ca8583506f23a0c6fe701d6213d836 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 23 Mar 2017 21:39:38 -0400 Subject: WT-3228 Remove with overwrite shouldn't return WT_NOTFOUND (#3339) * Table cursors with overwrite configured wrongly treat not-found as an error, return success instead. * The LSM code clears WT_CURSTD_KEY_SET on unsuccessful searches, which breaks table cursors with indices doing searches on the set of cursors in order to delete old index keys, because there's no key set when it's time to do the update. --- src/cursor/cur_table.c | 12 ++++++++ src/lsm/lsm_cursor.c | 6 ++-- test/suite/test_overwrite.py | 67 +++++++++++++++++++++++++++++--------------- 3 files changed, 59 insertions(+), 26 deletions(-) diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c index ef2c0ac5163..3b72bb0730f 100644 --- a/src/cursor/cur_table.c +++ b/src/cursor/cur_table.c @@ -625,13 +625,25 @@ __curtable_remove(WT_CURSOR *cursor) /* Find the old record so it can be removed from indices */ if (ctable->table->nindices > 0) { APPLY_CG(ctable, search); + if (ret == WT_NOTFOUND) + goto notfound; WT_ERR(ret); WT_ERR(__apply_idx(ctable, offsetof(WT_CURSOR, remove), false)); } APPLY_CG(ctable, remove); + if (ret == WT_NOTFOUND) + goto notfound; WT_ERR(ret); +notfound: + /* + * If the cursor is configured to overwrite and the record is not found, + * that is exactly what we want. + */ + if (ret == WT_NOTFOUND && F_ISSET(primary, WT_CURSTD_OVERWRITE)) + ret = 0; + /* * If the cursor was positioned, it stays positioned with a key but no * no value, otherwise, there's no position, key or value. This isn't diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 2a34240de46..3f0b6df8eb0 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -1250,10 +1250,10 @@ __clsm_lookup(WT_CURSOR_LSM *clsm, WT_ITEM *value) WT_ERR(WT_NOTFOUND); done: -err: F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); - if (ret == 0) { - clsm->current = c; +err: if (ret == 0) { + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); F_SET(cursor, WT_CURSTD_KEY_INT); + clsm->current = c; if (value == &cursor->value) F_SET(cursor, WT_CURSTD_VALUE_INT); } else if (c != NULL) diff --git a/test/suite/test_overwrite.py b/test/suite/test_overwrite.py index 4739abaa578..c894de99bd0 100644 --- a/test/suite/test_overwrite.py +++ b/test/suite/test_overwrite.py @@ -27,32 +27,47 @@ # OTHER DEALINGS IN THE SOFTWARE. import wiredtiger, wttest -from wtdataset import SimpleDataSet +from wtdataset import SimpleDataSet, SimpleIndexDataSet +from wtdataset import SimpleLSMDataSet, ComplexDataSet, ComplexLSMDataSet from wtscenario import make_scenarios # test_overwrite.py # cursor overwrite configuration method class test_overwrite(wttest.WiredTigerTestCase): name = 'overwrite' - scenarios = make_scenarios([ - ('file-r', dict(type='file:', keyfmt='r', dataset=SimpleDataSet)), - ('file-S', dict(type='file:', keyfmt='S', dataset=SimpleDataSet)), - ('lsm-S', dict(type='lsm:', keyfmt='S', dataset=SimpleDataSet)), - ('table-r', dict(type='table:', keyfmt='r', dataset=SimpleDataSet)), - ('table-S', dict(type='table:', keyfmt='S', dataset=SimpleDataSet)), - ]) + keyfmt = [ + ('integer', dict(keyfmt='i')), + ('recno', dict(keyfmt='r')), + ('string', dict(keyfmt='S')), + ] + types = [ + ('file', dict(uri='file:', ds=SimpleDataSet)), + ('lsm', dict(uri='lsm:', ds=SimpleDataSet)), + ('table-complex', dict(uri='table:', ds=ComplexDataSet)), + ('table-complex-lsm', dict(uri='table:', ds=ComplexLSMDataSet)), + ('table-index', dict(uri='table:', ds=SimpleIndexDataSet)), + ('table-simple', dict(uri='table:', ds=SimpleDataSet)), + ('table-simple-lsm', dict(uri='table:', ds=SimpleLSMDataSet)), + ] + scenarios = make_scenarios(types, keyfmt) + def skip(self): + return self.keyfmt == 'r' and \ + (self.ds.is_lsm() or self.uri == 'lsm') # Confirm a cursor configured with/without overwrite correctly handles # non-existent records during insert, remove and update operations. def test_overwrite_insert(self): - uri = self.type + self.name - ds = self.dataset(self, uri, 100, key_format=self.keyfmt) + if self.skip(): + return + + uri = self.uri + self.name + ds = self.ds(self, uri, 100, key_format=self.keyfmt) ds.populate() # Insert of an existing record with overwrite off fails. cursor = self.session.open_cursor(uri, None, "overwrite=false") cursor.set_key(ds.key(5)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1000)) self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.insert()) # One additional test for the insert method: duplicate the cursor with @@ -63,30 +78,33 @@ class test_overwrite(wttest.WiredTigerTestCase): cursor = self.session.open_cursor(uri, None, "overwrite=false") cursor.set_key(ds.key(5)) dupc = self.session.open_cursor(None, cursor, "overwrite=true") - dupc.set_value('XXXXXXXXXX') + dupc.set_value(ds.value(1001)) self.assertEquals(dupc.insert(), 0) # Insert of an existing record with overwrite on succeeds. cursor = self.session.open_cursor(uri, None) cursor.set_key(ds.key(6)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1002)) self.assertEquals(cursor.insert(), 0) # Insert of a non-existent record with overwrite off succeeds. cursor = self.session.open_cursor(uri, None, "overwrite=false") cursor.set_key(ds.key(200)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1003)) self.assertEquals(cursor.insert(), 0) # Insert of a non-existent record with overwrite on succeeds. cursor = self.session.open_cursor(uri, None) cursor.set_key(ds.key(201)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1004)) self.assertEquals(cursor.insert(), 0) def test_overwrite_remove(self): - uri = self.type + self.name - ds = self.dataset(self, uri, 100, key_format=self.keyfmt) + if self.skip(): + return + + uri = self.uri + self.name + ds = self.ds(self, uri, 100, key_format=self.keyfmt) ds.populate() # Remove of an existing record with overwrite off succeeds. @@ -110,32 +128,35 @@ class test_overwrite(wttest.WiredTigerTestCase): self.assertEquals(cursor.remove(), 0) def test_overwrite_update(self): - uri = self.type + self.name - ds = self.dataset(self, uri, 100, key_format=self.keyfmt) + if self.skip(): + return + + uri = self.uri + self.name + ds = self.ds(self, uri, 100, key_format=self.keyfmt) ds.populate() # Update of an existing record with overwrite off succeeds. cursor = self.session.open_cursor(uri, None, "overwrite=false") cursor.set_key(ds.key(5)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1005)) self.assertEquals(cursor.update(), 0) # Update of an existing record with overwrite on succeeds. cursor = self.session.open_cursor(uri, None) cursor.set_key(ds.key(6)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1006)) self.assertEquals(cursor.update(), 0) # Update of a non-existent record with overwrite off fails. cursor = self.session.open_cursor(uri, None, "overwrite=false") cursor.set_key(ds.key(200)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1007)) self.assertEquals(cursor.update(), wiredtiger.WT_NOTFOUND) # Update of a non-existent record with overwrite on succeeds. cursor = self.session.open_cursor(uri, None) cursor.set_key(ds.key(201)) - cursor.set_value('XXXXXXXXXX') + cursor.set_value(ds.value(1008)) self.assertEquals(cursor.update(), 0) if __name__ == '__main__': -- cgit v1.2.1 From 54909d4c49019e6d9d007d3783cb8f3dbbccba84 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 24 Mar 2017 01:02:52 -0400 Subject: WT-98 Update the current cursor value without a search (#3330) --- src/btree/bt_curnext.c | 11 +- src/btree/bt_curprev.c | 11 +- src/btree/bt_cursor.c | 324 +++++++++++++++++++++++++++---------- src/btree/bt_random.c | 7 +- src/btree/bt_ret.c | 21 ++- src/cursor/cur_file.c | 135 ++++++++-------- src/cursor/cur_join.c | 4 +- src/evict/evict_lru.c | 4 +- src/include/cursor.i | 25 +-- src/include/extern.h | 4 +- src/include/packing.i | 2 +- src/include/wiredtiger.in | 4 + src/log/log.c | 4 +- src/lsm/lsm_cursor.c | 35 ++-- src/lsm/lsm_merge.c | 2 +- src/lsm/lsm_meta.c | 2 +- src/lsm/lsm_stat.c | 4 +- src/schema/schema_create.c | 2 +- src/schema/schema_worker.c | 4 +- src/session/session_api.c | 7 +- src/session/session_compact.c | 2 +- src/txn/txn.c | 2 +- src/txn/txn_ckpt.c | 2 +- test/format/config.c | 47 +++--- test/format/ops.c | 362 ++++++++++++++++++++++++++++-------------- test/suite/test_truncate01.py | 1 + 26 files changed, 663 insertions(+), 365 deletions(-) diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c index ba5fceae7c7..21e575ffca9 100644 --- a/src/btree/bt_curnext.c +++ b/src/btree/bt_curnext.c @@ -579,20 +579,20 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt) int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) { + WT_CURSOR *cursor; WT_DECL_RET; WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; bool newpage; + cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_STAT_CONN_INCR(session, cursor_next); WT_STAT_DATA_INCR(session, cursor_next); - flags = WT_READ_SKIP_INTL; /* Tree walk flags. */ - if (truncating) - LF_SET(WT_READ_TRUNCATE); + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); WT_RET(__cursor_func_init(cbt, false)); @@ -608,6 +608,9 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) * found. Then, move to the next page, until we reach the end of the * file. */ + flags = WT_READ_SKIP_INTL; /* tree walk flags */ + if (truncating) + LF_SET(WT_READ_TRUNCATE); for (newpage = false;; newpage = true) { page = cbt->ref == NULL ? NULL : cbt->ref->page; @@ -676,6 +679,8 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) if (ret == 0) WT_ERR(__wt_cursor_key_order_check(session, cbt, true)); #endif + if (ret == 0) + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); err: if (ret != 0) WT_TRET(__cursor_reset(cbt)); diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c index 602c01b60eb..bf4bdad6529 100644 --- a/src/btree/bt_curprev.c +++ b/src/btree/bt_curprev.c @@ -535,20 +535,20 @@ new_insert: if ((ins = cbt->ins) != NULL) { int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) { + WT_CURSOR *cursor; WT_DECL_RET; WT_PAGE *page; WT_SESSION_IMPL *session; uint32_t flags; bool newpage; + cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_STAT_CONN_INCR(session, cursor_prev); WT_STAT_DATA_INCR(session, cursor_prev); - flags = WT_READ_PREV | WT_READ_SKIP_INTL; /* Tree walk flags. */ - if (truncating) - LF_SET(WT_READ_TRUNCATE); + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); WT_RET(__cursor_func_init(cbt, false)); @@ -564,6 +564,9 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) * found. Then, move to the previous page, until we reach the start * of the file. */ + flags = WT_READ_PREV | WT_READ_SKIP_INTL; /* tree walk flags */ + if (truncating) + LF_SET(WT_READ_TRUNCATE); for (newpage = false;; newpage = true) { page = cbt->ref == NULL ? NULL : cbt->ref->page; @@ -631,6 +634,8 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) if (ret == 0) WT_ERR(__wt_cursor_key_order_check(session, cbt, false)); #endif + if (ret == 0) + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); err: if (ret != 0) WT_TRET(__cursor_reset(cbt)); diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 48ae1ad6d76..944e276fc01 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -9,32 +9,46 @@ #include "wt_internal.h" /* - * WT_CURFILE_OP_XXX - * If we're going to return an error, we need to restore the cursor to - * a valid state, the upper-level cursor code is likely to retry. The macros - * here are called to save and restore that state. + * When returning an error, we need to restore the cursor to a valid state, the + * upper-level cursor code is likely to retry. This structure and the associated + * functions are used save and restore the cursor state. */ -#define WT_CURFILE_OP_DECL \ - WT_ITEM __key_copy; \ - WT_ITEM __value_copy; \ - uint64_t __recno; \ - uint32_t __flags -#define WT_CURFILE_OP_PUSH do { \ - WT_ITEM_SET(__key_copy, cursor->key); \ - WT_ITEM_SET(__value_copy, cursor->value); \ - __recno = cursor->recno; \ - __flags = cursor->flags; \ -} while (0) -#define WT_CURFILE_OP_POP do { \ - cursor->recno = __recno; \ - if (FLD_ISSET(__flags, WT_CURSTD_KEY_EXT)) \ - WT_ITEM_SET(cursor->key, __key_copy); \ - if (FLD_ISSET(__flags, WT_CURSTD_VALUE_EXT)) \ - WT_ITEM_SET(cursor->value, __value_copy); \ - F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); \ - F_SET(cursor, \ - FLD_MASK(__flags, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT));\ -} while (0) +typedef struct { + WT_ITEM key; + WT_ITEM value; + uint64_t recno; + uint32_t flags; +} WT_CURFILE_STATE; + +/* + * __cursor_state_save -- + * Save the cursor's external state. + */ +static inline void +__cursor_state_save(WT_CURSOR *cursor, WT_CURFILE_STATE *state) +{ + WT_ITEM_SET(state->key, cursor->key); + WT_ITEM_SET(state->value, cursor->value); + state->recno = cursor->recno; + state->flags = cursor->flags; +} + +/* + * __cursor_state_restore -- + * Restore the cursor's external state. + */ +static inline void +__cursor_state_restore(WT_CURSOR *cursor, WT_CURFILE_STATE *state) +{ + if (F_ISSET(state, WT_CURSTD_KEY_EXT)) + WT_ITEM_SET(cursor->key, state->key); + if (F_ISSET(state, WT_CURSTD_VALUE_EXT)) + WT_ITEM_SET(cursor->value, state->value); + cursor->recno = state->recno; + F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + F_SET(cursor, F_MASK(state, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT)); + +} /* * __cursor_page_pinned -- @@ -377,13 +391,17 @@ __cursor_row_modify( int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) { + WT_CURSOR *cursor; WT_SESSION_IMPL *session; + cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_STAT_CONN_INCR(session, cursor_reset); WT_STAT_DATA_INCR(session, cursor_reset); + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + return (__cursor_reset(cbt)); } @@ -395,6 +413,7 @@ int __wt_btcur_search(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -409,6 +428,15 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_STAT_CONN_INCR(session, cursor_search); WT_STAT_DATA_INCR(session, cursor_search); + __cursor_state_save(cursor, &state); + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key, then re-save the cursor state. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + /* * If we have a page pinned, search it; if we don't have a page pinned, * or the search of the pinned page doesn't find an exact match, search @@ -443,6 +471,8 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) cbt->v = 0; cursor->value.data = &cbt->v; cursor->value.size = 1; + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } else ret = WT_NOTFOUND; @@ -451,8 +481,10 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_ERR(__wt_cursor_key_order_init(session, cbt)); #endif -err: if (ret != 0) +err: if (ret != 0) { WT_TRET(__cursor_reset(cbt)); + __cursor_state_restore(cursor, &state); + } return (ret); } @@ -464,6 +496,7 @@ int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -480,6 +513,15 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_STAT_CONN_INCR(session, cursor_search_near); WT_STAT_DATA_INCR(session, cursor_search_near); + __cursor_state_save(cursor, &state); + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key, then re-save the cursor state. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + /* * If we have a row-store page pinned, search it; if we don't have a * page pinned, or the search of the pinned page doesn't find an exact @@ -544,6 +586,8 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) cursor->value.data = &cbt->v; cursor->value.size = 1; exact = 0; + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } else if ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND) exact = 1; else { @@ -558,15 +602,18 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) exact = -1; } +err: if (ret == 0 && exactp != NULL) + *exactp = exact; + #ifdef HAVE_DIAGNOSTIC if (ret == 0) - WT_ERR(__wt_cursor_key_order_init(session, cbt)); + WT_TRET(__wt_cursor_key_order_init(session, cbt)); #endif -err: if (ret != 0) + if (ret != 0) { WT_TRET(__cursor_reset(cbt)); - if (exactp != NULL && (ret == 0 || ret == WT_NOTFOUND)) - *exactp = exact; + __cursor_state_restore(cursor, &state); + } return (ret); } @@ -578,9 +625,11 @@ int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; + bool append_key; btree = cbt->btree; cursor = &cbt->iface; @@ -591,6 +640,8 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_STAT_DATA_INCRV(session, cursor_insert_bytes, cursor->key.size + cursor->value.size); + __cursor_state_save(cursor, &state); + if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); WT_RET(__cursor_size_chk(session, &cursor->value)); @@ -598,7 +649,58 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) /* It's no longer possible to bulk-load into the tree. */ __cursor_disable_bulk(session, btree); -retry: WT_RET(__cursor_func_init(cbt, true)); + /* + * Insert a new record if WT_CURSTD_APPEND configured, (ignoring any + * application set record number). Although append can't be configured + * for a row-store, this code would break if it were, and that's owned + * by the upper cursor layer, be cautious. + */ + append_key = + F_ISSET(cursor, WT_CURSTD_APPEND) && btree->type != BTREE_ROW; + + /* + * If inserting with overwrite configured, and positioned to an on-page + * key, the update doesn't require another search. The cursor won't be + * positioned on a page with an external key set, but be sure. Cursors + * configured for append aren't included, regardless of whether or not + * they meet all other criteria. + */ + if (__cursor_page_pinned(cbt) && + F_ISSET_ALL(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_OVERWRITE) && + !append_key) { + WT_ERR(__wt_txn_autocommit_check(session)); + /* + * The cursor position may not be exact (the cursor's comparison + * value not equal to zero). Correct to an exact match so we can + * update whatever we're pointing at. + */ + cbt->compare = 0; + ret = btree->type == BTREE_ROW ? + __cursor_row_modify(session, cbt, false) : + __cursor_col_modify(session, cbt, false); + if (ret == 0) + goto done; + + /* + * The pinned page goes away if we fail for any reason, make + * sure there's a local copy of any key. (Restart could still + * use the pinned page, but that's an unlikely path.) Re-save + * the cursor state: we may retry but eventually fail. + */ + WT_TRET(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + goto err; + } + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Re-save the cursor state: we may retry but + * eventually fail. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + +retry: WT_ERR(__cursor_func_init(cbt, true)); if (btree->type == BTREE_ROW) { WT_ERR(__cursor_row_search(session, cbt, NULL, true)); @@ -613,11 +715,11 @@ retry: WT_RET(__cursor_func_init(cbt, true)); ret = __cursor_row_modify(session, cbt, false); } else { /* - * If WT_CURSTD_APPEND is set, insert a new record (ignoring - * the application's record number). The real record number - * is assigned by the serialized append operation. + * Optionally insert a new record (ignoring the application's + * record number). The real record number is allocated by the + * serialized append operation. */ - if (F_ISSET(cursor, WT_CURSTD_APPEND)) + if (append_key) cbt->iface.recno = WT_RECNO_OOB; WT_ERR(__cursor_col_search(session, cbt, NULL)); @@ -634,7 +736,8 @@ retry: WT_RET(__cursor_func_init(cbt, true)); WT_ERR(WT_DUPLICATE_KEY); WT_ERR(__cursor_col_modify(session, cbt, false)); - if (F_ISSET(cursor, WT_CURSTD_APPEND)) + + if (append_key) cbt->iface.recno = cbt->recno; } @@ -644,8 +747,16 @@ err: if (ret == WT_RESTART) { goto retry; } - /* Insert doesn't maintain a position across calls, clear resources. */ +done: /* Insert doesn't maintain a position across calls, clear resources. */ + if (ret == 0) { + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (append_key) + F_SET(cursor, WT_CURSTD_KEY_INT); + } WT_TRET(__cursor_reset(cbt)); + if (ret != 0) + __cursor_state_restore(cursor, &state); + return (ret); } @@ -681,16 +792,15 @@ __curfile_update_check(WT_CURSOR_BTREE *cbt) } /* - * __wt_btcur_update_check -- + * __wt_btcur_insert_check -- * Check whether an update would conflict. * - * This can be used to replace WT_CURSOR::insert or WT_CURSOR::update, so - * they only check for conflicts without updating the tree. It is used to - * maintain snapshot isolation for transactions that span multiple chunks - * in an LSM tree. + * This can replace WT_CURSOR::insert, so it only checks for conflicts without + * updating the tree. It is used to maintain snapshot isolation for transactions + * that span multiple chunks in an LSM tree. */ int -__wt_btcur_update_check(WT_CURSOR_BTREE *cbt) +__wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; WT_CURSOR *cursor; @@ -701,14 +811,20 @@ __wt_btcur_update_check(WT_CURSOR_BTREE *cbt) btree = cbt->btree; session = (WT_SESSION_IMPL *)cursor->session; -retry: WT_RET(__cursor_func_init(cbt, true)); + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Unlike most of the btree cursor routines, + * we don't have to save/restore the cursor key state, none of the + * work done here changes the key state. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + +retry: WT_ERR(__cursor_func_init(cbt, true)); if (btree->type == BTREE_ROW) { WT_ERR(__cursor_row_search(session, cbt, NULL, true)); - /* - * Just check for conflicts. - */ + /* Just check for conflicts. */ ret = __curfile_update_check(cbt); } else WT_ERR(__wt_illegal_value(session, NULL)); @@ -720,7 +836,10 @@ err: if (ret == WT_RESTART) { } /* Insert doesn't maintain a position across calls, clear resources. */ + if (ret == 0) + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); WT_TRET(__cursor_reset(cbt)); + return (ret); } @@ -732,7 +851,7 @@ int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; - WT_CURFILE_OP_DECL; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -742,26 +861,27 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; - WT_CURFILE_OP_PUSH; - WT_STAT_CONN_INCR(session, cursor_remove); WT_STAT_DATA_INCR(session, cursor_remove); WT_STAT_DATA_INCRV(session, cursor_remove_bytes, cursor->key.size); + __cursor_state_save(cursor, &state); + /* * WT_CURSOR.remove has a unique semantic, the cursor stays positioned * if it starts positioned, otherwise clear the cursor on completion. */ positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT); -retry: /* - * If removing with overwrite configured, and positioned to an on-page - * key, the update doesn't require another search. The cursor won't be - * positioned on a page with an external key set, but be sure. + * If remove positioned to an on-page key, the remove doesn't require + * another search. We don't care about the "overwrite" configuration + * because regardless of the overwrite setting, any existing record is + * removed, and the record must exist with a positioned cursor. The + * cursor won't be positioned on a page with an external key set, but + * be sure. */ - if (__cursor_page_pinned(cbt) && - F_ISSET_ALL(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_OVERWRITE)) { + if (__cursor_page_pinned(cbt) && F_ISSET(cursor, WT_CURSTD_KEY_INT)) { WT_ERR(__wt_txn_autocommit_check(session)); /* @@ -773,6 +893,8 @@ retry: ret = btree->type == BTREE_ROW ? __cursor_row_modify(session, cbt, true) : __cursor_col_modify(session, cbt, true); + if (ret == 0) + goto done; /* * The pinned page goes away if we fail for any reason, make @@ -780,12 +902,9 @@ retry: * use the pinned page, but that's an unlikely path.) Re-save * the cursor state: we may retry but eventually fail. */ - if (ret != 0) { - WT_TRET(__cursor_copy_int_key(cursor)); - WT_CURFILE_OP_PUSH; - goto err; - } - goto done; + WT_TRET(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + goto err; } /* @@ -794,9 +913,9 @@ retry: * eventually fail. */ WT_ERR(__cursor_copy_int_key(cursor)); - WT_CURFILE_OP_PUSH; + __cursor_state_save(cursor, &state); - WT_ERR(__cursor_func_init(cbt, true)); +retry: WT_ERR(__cursor_func_init(cbt, true)); if (btree->type == BTREE_ROW) { WT_ERR(__cursor_row_search(session, cbt, NULL, false)); @@ -857,14 +976,12 @@ done: /* */ if (ret == 0) F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); - if (ret == 0 && positioned) { + if (ret == 0 && positioned) WT_TRET(__wt_key_return(session, cbt)); - if (ret == 0) - F_SET(cursor, WT_CURSTD_KEY_INT); - } else + else WT_TRET(__cursor_reset(cbt)); if (ret != 0) - WT_CURFILE_OP_POP; + __cursor_state_restore(cursor, &state); return (ret); } @@ -877,6 +994,7 @@ int __wt_btcur_update(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -889,6 +1007,8 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_STAT_DATA_INCR(session, cursor_update); WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size); + __cursor_state_save(cursor, &state); + if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); WT_RET(__cursor_size_chk(session, &cursor->value)); @@ -896,7 +1016,48 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt) /* It's no longer possible to bulk-load into the tree. */ __cursor_disable_bulk(session, btree); -retry: WT_RET(__cursor_func_init(cbt, true)); + /* + * If update positioned to an on-page key, the update doesn't require + * another search. We don't care about the "overwrite" configuration + * because regardless of the overwrite setting, any existing record is + * updated, and the record must exist with a positioned cursor. The + * cursor won't be positioned on a page with an external key set, but + * be sure. + */ + if (__cursor_page_pinned(cbt) && F_ISSET(cursor, WT_CURSTD_KEY_INT)) { + WT_ERR(__wt_txn_autocommit_check(session)); + /* + * The cursor position may not be exact (the cursor's comparison + * value not equal to zero). Correct to an exact match so we can + * update whatever we're pointing at. + */ + cbt->compare = 0; + ret = btree->type == BTREE_ROW ? + __cursor_row_modify(session, cbt, false) : + __cursor_col_modify(session, cbt, false); + if (ret == 0) + goto done; + + /* + * The pinned page goes away if we fail for any reason, make + * sure there's a local copy of any key. (Restart could still + * use the pinned page, but that's an unlikely path.) Re-save + * the cursor state: we may retry but eventually fail. + */ + WT_TRET(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + goto err; + } + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Re-save the cursor state: we may retry but + * eventually fail. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + +retry: WT_ERR(__cursor_func_init(cbt, true)); if (btree->type == BTREE_ROW) { WT_ERR(__cursor_row_search(session, cbt, NULL, true)); @@ -945,11 +1106,14 @@ err: if (ret == WT_RESTART) { * To make this work, we add a field to the btree cursor to pass back a * pointer to the modify function's allocated update structure. */ - if (ret == 0) +done: if (ret == 0) WT_TRET(__wt_kv_return(session, cbt, cbt->modify_update)); - if (ret != 0) + if (ret != 0) { WT_TRET(__cursor_reset(cbt)); + __cursor_state_restore(cursor, &state); + } + return (ret); } @@ -1097,14 +1261,6 @@ __cursor_truncate(WT_SESSION_IMPL *session, * and we can proceed without concern. */ retry: WT_RET(__wt_btcur_search(start)); - - /* - * XXX KEITH - * When the btree cursor code sets/clears the cursor flags (rather than - * the cursor layer), the set/clear goes away, only the assert remains. - */ - F_CLR((WT_CURSOR *)start, WT_CURSTD_KEY_SET); - F_SET((WT_CURSOR *)start, WT_CURSTD_KEY_INT); WT_ASSERT(session, F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); @@ -1161,14 +1317,6 @@ __cursor_truncate_fix(WT_SESSION_IMPL *session, * refresh the page's modification information. */ retry: WT_RET(__wt_btcur_search(start)); - - /* - * XXX KEITH - * When the btree cursor code sets/clears the cursor flags (rather than - * the cursor layer), the set/clear goes away, only the assert remains. - */ - F_CLR((WT_CURSOR *)start, WT_CURSTD_KEY_SET); - F_SET((WT_CURSOR *)start, WT_CURSTD_KEY_INT); WT_ASSERT(session, F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); diff --git a/src/btree/bt_random.c b/src/btree/bt_random.c index 25ede0a09ac..c5948ec4ab5 100644 --- a/src/btree/bt_random.c +++ b/src/btree/bt_random.c @@ -292,14 +292,16 @@ int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; WT_UPDATE *upd; wt_off_t size; uint64_t n, skip; - session = (WT_SESSION_IMPL *)cbt->iface.session; btree = cbt->btree; + cursor = &cbt->iface; + session = (WT_SESSION_IMPL *)cbt->iface.session; /* * Only supports row-store: applications can trivially select a random @@ -312,6 +314,8 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_STAT_CONN_INCR(session, cursor_next); WT_STAT_DATA_INCR(session, cursor_next); + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + #ifdef HAVE_DIAGNOSTIC /* * Under some conditions we end up using the underlying cursor.next to @@ -320,7 +324,6 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) */ __wt_cursor_key_order_reset(cbt); #endif - /* * If we don't have a current position in the tree, or if retrieving * random values without sampling, pick a roughly random leaf page in diff --git a/src/btree/bt_ret.c b/src/btree/bt_ret.c index 9fc457e2297..f17fa1b85d1 100644 --- a/src/btree/bt_ret.c +++ b/src/btree/bt_ret.c @@ -142,8 +142,20 @@ __value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) { - WT_RET(__key_return(session, cbt)); + WT_CURSOR *cursor; + + cursor = &cbt->iface; + /* + * We may already have an internal key, in which case the cursor may + * not be set up to get another copy (for example, when we rely on a + * search-function result). + */ + F_CLR(cursor, WT_CURSTD_KEY_EXT); + if (!F_ISSET(cursor, WT_CURSTD_KEY_INT)) { + WT_RET(__key_return(session, cbt)); + F_SET(cursor, WT_CURSTD_KEY_INT); + } return (0); } @@ -154,8 +166,15 @@ __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) { + WT_CURSOR *cursor; + + cursor = &cbt->iface; + WT_RET(__wt_key_return(session, cbt)); + + F_CLR(cursor, WT_CURSTD_VALUE_EXT); WT_RET(__value_return(session, cbt, upd)); + F_SET(cursor, WT_CURSTD_VALUE_INT); return (0); } diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index 274dc1e8f62..205afb607c3 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -8,29 +8,6 @@ #include "wt_internal.h" -/* - * WT_BTREE_CURSOR_SAVE_AND_RESTORE - * Save the cursor's key/value data/size fields, call an underlying btree - * function, and then consistently handle failure and success. - */ -#define WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, f, ret) do { \ - WT_ITEM __key_copy = (cursor)->key; \ - uint64_t __recno = (cursor)->recno; \ - WT_ITEM __value_copy = (cursor)->value; \ - if (((ret) = (f)) == 0) { \ - F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT); \ - F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); \ - } else { \ - if (F_ISSET(cursor, WT_CURSTD_KEY_EXT)) { \ - (cursor)->recno = __recno; \ - WT_ITEM_SET((cursor)->key, __key_copy); \ - } \ - if (F_ISSET(cursor, WT_CURSTD_VALUE_EXT)) \ - WT_ITEM_SET((cursor)->value, __value_copy); \ - F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); \ - } \ -} while (0) - /* * __curfile_compare -- * WT_CURSOR->compare method for the btree cursor type. @@ -109,9 +86,12 @@ __curfile_next(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, next, cbt->btree); - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); - if ((ret = __wt_btcur_next(cbt, false)) == 0) - F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + WT_ERR(__wt_btcur_next(cbt, false)); + + /* Next maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: API_END_RET(session, ret); } @@ -131,9 +111,12 @@ __wt_curfile_next_random(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, next, cbt->btree); - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); - if ((ret = __wt_btcur_next_random(cbt)) == 0) - F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + WT_ERR(__wt_btcur_next_random(cbt)); + + /* Next-random maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: API_END_RET(session, ret); } @@ -152,9 +135,12 @@ __curfile_prev(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, prev, cbt->btree); - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); - if ((ret = __wt_btcur_prev(cbt, false)) == 0) - F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + WT_ERR(__wt_btcur_prev(cbt, false)); + + /* Prev maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: API_END_RET(session, ret); } @@ -175,7 +161,10 @@ __curfile_reset(WT_CURSOR *cursor) ret = __wt_btcur_reset(cbt); - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + /* Reset maintains no position, key or value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == 0 && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0); err: API_END_RET(session, ret); } @@ -194,10 +183,15 @@ __curfile_search(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, search, cbt->btree); - WT_CURSOR_NEEDKEY(cursor); + WT_CURSOR_CHECKKEY(cursor); WT_CURSOR_NOVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, __wt_btcur_search(cbt), ret); + WT_ERR(__wt_btcur_search(cbt)); + + /* Search maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: API_END_RET(session, ret); } @@ -216,11 +210,15 @@ __curfile_search_near(WT_CURSOR *cursor, int *exact) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, search_near, cbt->btree); - WT_CURSOR_NEEDKEY(cursor); + WT_CURSOR_CHECKKEY(cursor); WT_CURSOR_NOVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE( - cursor, __wt_btcur_search_near(cbt, exact), ret); + WT_ERR(__wt_btcur_search_near(cbt, exact)); + + /* Search-near maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: API_END_RET(session, ret); } @@ -238,38 +236,33 @@ __curfile_insert(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, insert, cbt->btree); + if (!F_ISSET(cursor, WT_CURSTD_APPEND)) - WT_CURSOR_NEEDKEY(cursor); - WT_CURSOR_NEEDVALUE(cursor); + WT_CURSOR_CHECKKEY(cursor); + WT_CURSOR_CHECKVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, __wt_btcur_insert(cbt), ret); + WT_ERR(__wt_btcur_insert(cbt)); /* - * Insert is the one cursor operation that doesn't end with the cursor - * pointing to an on-page item (except for column-store appends, where - * we are returning a key). That is, the application's cursor continues - * to reference the application's memory after a successful cursor call, - * which isn't true anywhere else. We don't want to have to explain that - * scoping corner case, so we reset the application's cursor so it can - * free the referenced memory and continue on without risking subsequent - * core dumps. + * Insert maintains no position, key or value (except for column-store + * appends, where we are returning a key). */ - if (ret == 0) { - if (!F_ISSET(cursor, WT_CURSTD_APPEND)) - F_CLR(cursor, WT_CURSTD_KEY_INT); - F_CLR(cursor, WT_CURSTD_VALUE_INT); - } + WT_ASSERT(session, + (F_ISSET(cursor, WT_CURSTD_APPEND) && + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT) || + (!F_ISSET(cursor, WT_CURSTD_APPEND) && + F_MASK(cursor, WT_CURSTD_KEY_SET) == 0)); err: CURSOR_UPDATE_API_END(session, ret); return (ret); } /* - * __curfile_update -- - * WT_CURSOR->update method for the btree cursor type. + * __wt_curfile_insert_check -- + * WT_CURSOR->insert_check method for the btree cursor type. */ -static int -__curfile_update(WT_CURSOR *cursor) +int +__wt_curfile_insert_check(WT_CURSOR *cursor) { WT_CURSOR_BTREE *cbt; WT_DECL_RET; @@ -278,21 +271,21 @@ __curfile_update(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, update, cbt->btree); - WT_CURSOR_NEEDKEY(cursor); - WT_CURSOR_NEEDVALUE(cursor); + WT_CURSOR_CHECKKEY(cursor); + WT_CURSOR_NOVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, __wt_btcur_update(cbt), ret); + ret = __wt_btcur_insert_check(cbt); err: CURSOR_UPDATE_API_END(session, ret); return (ret); } /* - * __wt_curfile_update_check -- - * WT_CURSOR->update_check method for the btree cursor type. + * __curfile_update -- + * WT_CURSOR->update method for the btree cursor type. */ -int -__wt_curfile_update_check(WT_CURSOR *cursor) +static int +__curfile_update(WT_CURSOR *cursor) { WT_CURSOR_BTREE *cbt; WT_DECL_RET; @@ -301,11 +294,15 @@ __wt_curfile_update_check(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, update, cbt->btree); - WT_CURSOR_NEEDKEY(cursor); - WT_CURSOR_NOVALUE(cursor); + WT_CURSOR_CHECKKEY(cursor); + WT_CURSOR_CHECKVALUE(cursor); - WT_BTREE_CURSOR_SAVE_AND_RESTORE( - cursor, __wt_btcur_update_check(cbt), ret); + WT_ERR(__wt_btcur_update(cbt)); + + /* Update maintains a position, key and value. */ + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT && + F_MASK(cursor, WT_CURSTD_VALUE_SET) == WT_CURSTD_VALUE_INT); err: CURSOR_UPDATE_API_END(session, ret); return (ret); diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 8df8e201173..6135132601b 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -974,8 +974,8 @@ __curjoin_init_next(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, if (!iterable && F_ISSET(je, WT_CURJOIN_ENTRY_BLOOM)) { if (session->txn.isolation == WT_ISO_READ_UNCOMMITTED) WT_ERR_MSG(session, EINVAL, - "join cursors with Bloom filters cannot be " - "used with read-uncommitted isolation"); + "join cursors with Bloom filters cannot be " + "used with read-uncommitted isolation"); if (je->bloom == NULL) { /* * Look for compatible filters to be shared, diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 84c9990832d..a957d245958 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -987,7 +987,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) if (conn->evict_tune_num_points >= conn->evict_tune_datapts_needed) { if ((conn->evict_tune_workers_best == conn->evict_threads.current_threads) && - (conn->evict_threads.current_threads < + (conn->evict_threads.current_threads < conn->evict_threads_max)) { /* * Keep adding workers. We will check again @@ -996,7 +996,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_tune_datapts_needed += WT_MIN(EVICT_TUNE_DATAPT_MIN, (conn->evict_threads_max - - conn->evict_threads.current_threads)/ + - conn->evict_threads.current_threads) / EVICT_TUNE_BATCH); } else { /* diff --git a/src/include/cursor.i b/src/include/cursor.i index 9cb9f5e7189..12044e0e228 100644 --- a/src/include/cursor.i +++ b/src/include/cursor.i @@ -75,23 +75,6 @@ __cursor_leave(WT_SESSION_IMPL *session) __wt_txn_read_last(session); } -/* - * __curfile_enter -- - * Activate a file cursor. - */ -static inline int -__curfile_enter(WT_CURSOR_BTREE *cbt) -{ - WT_SESSION_IMPL *session; - - session = (WT_SESSION_IMPL *)cbt->iface.session; - - if (!F_ISSET(cbt, WT_CBT_NO_TXN)) - WT_RET(__cursor_enter(session)); - F_SET(cbt, WT_CBT_ACTIVE); - return (0); -} - /* * __cursor_reset -- * Reset the cursor, it no longer holds any position. @@ -264,8 +247,12 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter) /* If the transaction is idle, check that the cache isn't full. */ WT_RET(__wt_txn_idle_cache_check(session)); - if (!F_ISSET(cbt, WT_CBT_ACTIVE)) - WT_RET(__curfile_enter(cbt)); + /* Activate the file cursor. */ + if (!F_ISSET(cbt, WT_CBT_ACTIVE)) { + if (!F_ISSET(cbt, WT_CBT_NO_TXN)) + WT_RET(__cursor_enter(session)); + F_SET(cbt, WT_CBT_ACTIVE); + } /* * If this is an ordinary transactional cursor, make sure we are set up diff --git a/src/include/extern.h b/src/include/extern.h index c0aa21b7f4c..a7eb4b491a9 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -103,7 +103,7 @@ extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((wa extern int __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_update_check(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -289,7 +289,7 @@ extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const extern int __wt_curds_open( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curfile_next_random(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curfile_update_check(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_curfile_insert_check(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curjoin_joined(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/packing.i b/src/include/packing.i index d79afe6d4a2..6b4bcd49e04 100644 --- a/src/include/packing.i +++ b/src/include/packing.i @@ -198,7 +198,7 @@ next: if (pack->cur == pack->end) return (0); default: WT_RET_MSG(pack->session, EINVAL, - "Invalid type '%c' found in format '%.*s'", + "Invalid type '%c' found in format '%.*s'", pv->type, (int)(pack->end - pack->orig), pack->orig); } diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 7223aeae0f6..558e93d3de0 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1480,6 +1480,10 @@ struct __wt_session { * contains. * @snippet ex_all.c Truncate a range * + * Any specified cursors end with no position, and subsequent calls to + * the WT_CURSOR::next (WT_CURSOR::prev) method will iterate from the + * beginning (end) of the table. + * * @param session the session handle * @param name the URI of the file or table to truncate * @param start optional cursor marking the first record discarded; diff --git a/src/log/log.c b/src/log/log.c index 05234619d32..1a27120710b 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -783,8 +783,8 @@ __log_openfile(WT_SESSION_IMPL *session, __wt_log_desc_byteswap(desc); if (desc->log_magic != WT_LOG_MAGIC) WT_PANIC_RET(session, WT_ERROR, - "log file %s corrupted: Bad magic number %" PRIu32, - (*fhp)->name, desc->log_magic); + "log file %s corrupted: Bad magic number %" PRIu32, + (*fhp)->name, desc->log_magic); if (desc->majorv > WT_LOG_MAJOR_VERSION || (desc->majorv == WT_LOG_MAJOR_VERSION && desc->minorv > WT_LOG_MINOR_VERSION)) diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 3f0b6df8eb0..0de39b38370 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -178,20 +178,12 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update) if (reset) { WT_ASSERT(session, !F_ISSET(&clsm->iface, - WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT)); + WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT)); WT_RET(__clsm_reset_cursors(clsm, NULL)); } for (;;) { - /* - * If the cursor looks up-to-date, check if the cache is full. - * In case this call blocks, the check will be repeated before - * proceeding. - */ - if (clsm->dsk_gen != lsm_tree->dsk_gen && - lsm_tree->nchunks != 0) - goto open; - + /* Check if the cursor looks up-to-date. */ if (clsm->dsk_gen != lsm_tree->dsk_gen && lsm_tree->nchunks != 0) goto open; @@ -666,7 +658,7 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) { */ if (i != nchunks - 1) clsm->chunks[i]->cursor->insert = - __wt_curfile_update_check; + __wt_curfile_insert_check; if (!F_ISSET(clsm, WT_CLSM_MERGE) && F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) @@ -852,8 +844,8 @@ __clsm_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) WT_ERR_MSG(session, EINVAL, "comparison method cursors must reference the same object"); - WT_CURSOR_NEEDKEY(a); - WT_CURSOR_NEEDKEY(b); + WT_CURSOR_CHECKKEY(a); + WT_CURSOR_CHECKKEY(b); WT_ERR(__wt_compare( session, alsm->lsm_tree->collator, &a->key, &b->key, cmpp)); @@ -1529,7 +1521,7 @@ __clsm_insert(WT_CURSOR *cursor) clsm = (WT_CURSOR_LSM *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, insert, NULL); - WT_CURSOR_NEEDKEY(cursor); + WT_CURSOR_CHECKKEY(cursor); WT_CURSOR_NEEDVALUE(cursor); WT_ERR(__clsm_enter(clsm, false, true)); @@ -1573,7 +1565,7 @@ __clsm_update(WT_CURSOR *cursor) clsm = (WT_CURSOR_LSM *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, update, NULL); - WT_CURSOR_NEEDKEY(cursor); + WT_CURSOR_CHECKKEY(cursor); WT_CURSOR_NEEDVALUE(cursor); WT_ERR(__clsm_enter(clsm, false, true)); @@ -1620,16 +1612,14 @@ __clsm_remove(WT_CURSOR *cursor) positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT); CURSOR_REMOVE_API_CALL(cursor, session, NULL); - WT_CURSOR_NEEDKEY(cursor); + WT_CURSOR_CHECKKEY(cursor); WT_CURSOR_NOVALUE(cursor); WT_ERR(__clsm_enter(clsm, false, true)); - if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) || - (ret = __clsm_lookup(clsm, &value)) == 0) - ret = __clsm_put( - session, clsm, &cursor->key, &__tombstone, positioned); - -err: __clsm_leave(clsm); + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) + WT_ERR(__clsm_lookup(clsm, &value)); + WT_ERR(__clsm_put( + session, clsm, &cursor->key, &__tombstone, positioned)); /* * If the cursor was positioned, it stays positioned with a key but no @@ -1643,6 +1633,7 @@ err: __clsm_leave(clsm); else WT_TRET(cursor->reset(cursor)); +err: __clsm_leave(clsm); CURSOR_UPDATE_API_END(session, ret); return (ret); } diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index a06b736bf0a..8838638f388 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -625,7 +625,7 @@ err: if (locked) else __wt_verbose(session, WT_VERB_LSM, "Merge failed with %s", - __wt_strerror(session, ret, NULL, 0)); + __wt_strerror(session, ret, NULL, 0)); } F_CLR(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION); return (ret); diff --git a/src/lsm/lsm_meta.c b/src/lsm/lsm_meta.c index 46ead6d6ac4..fc4dde82470 100644 --- a/src/lsm/lsm_meta.c +++ b/src/lsm/lsm_meta.c @@ -229,7 +229,7 @@ __lsm_meta_read_v1( cv.len -= 2; } WT_ERR(__wt_config_check(session, - WT_CONFIG_REF(session, WT_SESSION_create), cv.str, cv.len)); + WT_CONFIG_REF(session, WT_SESSION_create), cv.str, cv.len)); WT_ERR(__wt_strndup(session, cv.str, cv.len, &lsm_tree->bloom_config)); WT_ERR(__wt_config_getones( session, lsmconf, "lsm.bloom_hash_count", &cv)); diff --git a/src/lsm/lsm_stat.c b/src/lsm/lsm_stat.c index 21e8991be94..ed760b6d5f3 100644 --- a/src/lsm/lsm_stat.c +++ b/src/lsm/lsm_stat.c @@ -29,8 +29,8 @@ __curstat_lsm_init( const char *cfg[] = { WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL, NULL }; const char *disk_cfg[] = { - WT_CONFIG_BASE(session, WT_SESSION_open_cursor), - "checkpoint=" WT_CHECKPOINT, NULL, NULL }; + WT_CONFIG_BASE(session, WT_SESSION_open_cursor), + "checkpoint=" WT_CHECKPOINT, NULL, NULL }; locked = false; WT_RET(__wt_lsm_tree_get(session, uri, false, &lsm_tree)); diff --git a/src/schema/schema_create.c b/src/schema/schema_create.c index 020d5e72c13..a77ca51f9d2 100644 --- a/src/schema/schema_create.c +++ b/src/schema/schema_create.c @@ -35,7 +35,7 @@ __wt_direct_io_size_check(WT_SESSION_IMPL *session, * units of its happy place. */ if (FLD_ISSET(conn->direct_io, - WT_DIRECT_IO_CHECKPOINT | WT_DIRECT_IO_DATA)) { + WT_DIRECT_IO_CHECKPOINT | WT_DIRECT_IO_DATA)) { align = (int64_t)conn->buffer_alignment; if (align != 0 && (cval.val < align || cval.val % align != 0)) WT_RET_MSG(session, EINVAL, diff --git a/src/schema/schema_worker.c b/src/schema/schema_worker.c index e5f71b5d56f..62cdd7d367b 100644 --- a/src/schema/schema_worker.c +++ b/src/schema/schema_worker.c @@ -112,10 +112,10 @@ __wt_schema_worker(WT_SESSION_IMPL *session, wt_session = (WT_SESSION *)session; if (file_func == __wt_salvage && dsrc->salvage != NULL) WT_ERR(dsrc->salvage( - dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); + dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); else if (file_func == __wt_verify && dsrc->verify != NULL) WT_ERR(dsrc->verify( - dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); + dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg)); else if (file_func == __wt_checkpoint) ; else if (file_func == __wt_checkpoint_get_handles) diff --git a/src/session/session_api.c b/src/session/session_api.c index 3d13287fbe6..51233e5e224 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -1206,10 +1206,15 @@ __wt_session_range_truncate(WT_SESSION_IMPL *session, done: err: /* - * Close any locally-opened start cursor. + * Close any locally-opened start cursor. Reset application cursors, + * they've possibly moved and the application cannot use them. */ if (local_start) WT_TRET(start->close(start)); + else + WT_TRET(start->reset(start)); + if (stop != NULL) + WT_TRET(stop->reset(stop)); return (ret); } diff --git a/src/session/session_compact.c b/src/session/session_compact.c index 85214ae6d98..72c072e0fb8 100644 --- a/src/session/session_compact.c +++ b/src/session/session_compact.c @@ -210,7 +210,7 @@ __compact_checkpoint(WT_SESSION_IMPL *session) * work we need to have done is done in the underlying block manager. */ const char *checkpoint_cfg[] = { - WT_CONFIG_BASE(session, WT_SESSION_checkpoint), "force=1", NULL }; + WT_CONFIG_BASE(session, WT_SESSION_checkpoint), "force=1", NULL }; /* Checkpoints take a lot of time, check if we've run out. */ WT_RET(__wt_session_compact_check_timeout(session)); diff --git a/src/txn/txn.c b/src/txn/txn.c index e5e59c2b901..6eebf5ecf9f 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -713,7 +713,7 @@ __wt_txn_stats_update(WT_SESSION_IMPL *session) snapshot_pinned = txn_global->nsnap_oldest_id; WT_STAT_SET(session, stats, txn_pinned_range, - txn_global->current - txn_global->oldest_id); + txn_global->current - txn_global->oldest_id); WT_STAT_SET(session, stats, txn_pinned_snapshot_range, snapshot_pinned == WT_TXN_NONE ? diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 6c97922f7e1..5ec8aa19e4c 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -306,7 +306,7 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) WT_ASSERT(session, !F_ISSET(&session->txn, WT_TXN_ERROR)); WT_RET(__wt_metadata_cursor(session, &meta_cursor)); meta_cursor->set_key(meta_cursor, session->dhandle->name); - ret = __wt_curfile_update_check(meta_cursor); + ret = __wt_curfile_insert_check(meta_cursor); if (ret == WT_ROLLBACK) { metadata_race = true; ret = 0; diff --git a/test/format/config.c b/test/format/config.c index cd9856d641e..535dcd677e2 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -63,39 +63,42 @@ config_setup(void) config_in_memory(); /* - * Choose a data source type and a file type: they're interrelated (LSM - * trees are only compatible with row-store) and other items depend on - * them. + * Choose a file format and a data source: they're interrelated (LSM is + * only compatible with row-store) and other items depend on them. */ + if (!config_is_perm("file_type")) { + if (config_is_perm("data_source") && DATASOURCE("lsm")) + config_single("file_type=row", 0); + else + switch (mmrand(NULL, 1, 10)) { + case 1: /* 10% */ + config_single("file_type=fix", 0); + break; + case 2: case 3: case 4: /* 30% */ + config_single("file_type=var", 0); + break; /* 60% */ + case 5: case 6: case 7: case 8: case 9: case 10: + config_single("file_type=row", 0); + break; + } + } + config_map_file_type(g.c_file_type, &g.type); + if (!config_is_perm("data_source")) switch (mmrand(NULL, 1, 3)) { case 1: config_single("data_source=file", 0); break; case 2: - if (!g.c_in_memory) { - config_single("data_source=lsm", 0); - break; - } - /* FALLTHROUGH */ - case 3: config_single("data_source=table", 0); break; - } - - if (!config_is_perm("file_type")) - switch (DATASOURCE("lsm") ? 5 : mmrand(NULL, 1, 10)) { - case 1: - config_single("file_type=fix", 0); - break; - case 2: case 3: case 4: - config_single("file_type=var", 0); - break; - case 5: case 6: case 7: case 8: case 9: case 10: - config_single("file_type=row", 0); + case 3: + if (g.c_in_memory || g.type != ROW) + config_single("data_source=table", 0); + else + config_single("data_source=lsm", 0); break; } - config_map_file_type(g.c_file_type, &g.type); /* * If data_source and file_type were both "permanent", we may still diff --git a/test/format/ops.c b/test/format/ops.c index 1013d1da30b..05457ebb5a0 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -28,14 +28,17 @@ #include "format.h" -static int col_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); -static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t); -static int col_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static int col_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); +static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); +static int col_update( + TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static int nextprev(WT_CURSOR *, int); static void *ops(void *); -static int row_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); -static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t); -static int row_update(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +static int row_insert( + TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); +static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); +static int row_update( + TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static void table_append_init(void); #ifdef HAVE_BERKELEY_DB @@ -243,6 +246,9 @@ typedef struct { bool insert; /* Insert operation */ } SNAP_OPS; +#define SNAP_TRACK \ + (snap != NULL && (size_t)(snap - snap_list) < WT_ELEMENTS(snap_list)) + /* * snap_track -- * Add a single snapshot isolation returned value to the list. @@ -395,15 +401,16 @@ snap_check(WT_CURSOR *cursor, static void * ops(void *arg) { + enum { INSERT, READ, REMOVE, UPDATE } op; SNAP_OPS *snap, snap_list[64]; TINFO *tinfo; WT_CONNECTION *conn; - WT_CURSOR *cursor, *cursor_insert; + WT_CURSOR *cursor; WT_DECL_RET; WT_ITEM *key, _key, *value, _value; WT_SESSION *session; uint64_t keyno, ckpt_op, reset_op, session_op; - uint32_t op, rnd; + uint32_t rnd; u_int i; int dir; char *ckpt_config, ckpt_name[64]; @@ -429,9 +436,9 @@ ops(void *arg) val_gen_setup(&tinfo->rnd, value); /* Set the first operation where we'll create sessions and cursors. */ - session_op = 0; + cursor = NULL; session = NULL; - cursor = cursor_insert = NULL; + session_op = 0; /* Set the first operation where we'll perform checkpoint operations. */ ckpt_op = g.c_checkpoints ? mmrand(&tinfo->rnd, 100, 10000) : 0; @@ -485,24 +492,11 @@ ops(void *arg) readonly = true; } else { /* - * Open two cursors: one for overwriting and one - * for append (if it's a column-store). - * - * The reason is when testing with existing - * records, we don't track if a record was - * deleted or not, which means we must use - * cursor->insert with overwriting configured. - * But, in column-store files where we're - * testing with new, appended records, we don't - * want to have to specify the record number, - * which requires an append configuration. + * Configure "append", in the case of column + * stores, we append when inserting new rows. */ - testutil_check(session->open_cursor(session, - g.uri, NULL, "overwrite", &cursor)); - if (g.type == FIX || g.type == VAR) - testutil_check(session->open_cursor( - session, g.uri, - NULL, "append", &cursor_insert)); + testutil_check(session->open_cursor( + session, g.uri, NULL, "append", &cursor)); /* Pick the next session/cursor close/open. */ session_op += mmrand(&tinfo->rnd, 100, 5000); @@ -600,111 +594,174 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ intxn = true; } + /* Select a row. */ keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows); positioned = false; + /* Select an operation. */ + op = READ; + if (!readonly) { + i = mmrand(&tinfo->rnd, 1, 100); + if (i < g.c_delete_pct) + op = REMOVE; + else if (i < g.c_delete_pct + g.c_insert_pct) + op = INSERT; + else if (i < + g.c_delete_pct + g.c_insert_pct + g.c_write_pct) + op = UPDATE; + else + op = READ; + } + /* - * Perform some number of operations: the percentage of deletes, - * inserts and writes are specified, reads are the rest. The - * percentages don't have to add up to 100, a high percentage - * of deletes will mean fewer inserts and writes. Modifications - * are always followed by a read to confirm it worked. + * Inserts, removes and updates can be done following a cursor + * set-key, or based on a cursor position taken from a previous + * search. If not already doing a read, position the cursor at + * an existing point in the tree 20% of the time. */ - op = readonly ? UINT32_MAX : mmrand(&tinfo->rnd, 1, 100); - if (op < g.c_delete_pct) { - ++tinfo->remove; + positioned = false; + if (op != READ && mmrand(&tinfo->rnd, 1, 5) == 1) { + ++tinfo->search; + ret = read_row(cursor, key, value, keyno); + if (ret == 0) { + positioned = true; + if (SNAP_TRACK) + snap_track(snap++, keyno, NULL, value); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) + goto deadlock; + testutil_assert(ret == WT_NOTFOUND); + } + } +#if 0 + /* Optionally reserve a row. */ + if (!readonly && intxn && mmrand(&tinfo->rnd, 0, 20) == 1) { switch (g.type) { case ROW: - ret = row_remove(cursor, key, keyno); + ret = + row_reserve(cursor, key, keyno, positioned); break; case FIX: case VAR: - ret = col_remove(cursor, key, keyno); + ret = col_reserve(cursor, keyno, positioned); break; } if (ret == 0) { positioned = true; - if (snap != NULL && (size_t) - (snap - snap_list) < WT_ELEMENTS(snap_list)) + __wt_yield(); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) + goto deadlock; + testutil_assert(ret == WT_NOTFOUND); + } + } +#endif + /* Perform the operation. */ + switch (op) { + case REMOVE: + switch (g.type) { + case ROW: + ret = + row_remove(cursor, key, keyno, positioned); + break; + case FIX: + case VAR: + ret = + col_remove(cursor, key, keyno, positioned); + break; + } + if (ret == 0) { + ++tinfo->remove; + /* + * Don't set positioned: it's unchanged from the + * previous state, but not necessarily set. + */ + if (SNAP_TRACK) snap_track(snap++, keyno, NULL, NULL); } else { positioned = false; if (ret == WT_ROLLBACK && intxn) goto deadlock; + testutil_assert(ret == WT_NOTFOUND); } - } else if (op < g.c_delete_pct + g.c_insert_pct) { - ++tinfo->insert; + break; + case INSERT: switch (g.type) { case ROW: - key_gen_insert(&tinfo->rnd, key, keyno); - val_gen(&tinfo->rnd, value, keyno); - ret = row_insert(cursor, key, value, keyno); + ret = row_insert(tinfo, + cursor, key, value, keyno, positioned); break; case FIX: case VAR: /* - * We can only append so many new records, if - * we've reached that limit, update a record - * instead of doing an insert. + * We can only append so many new records, once + * we reach that limit, update a record instead + * of inserting. */ if (g.append_cnt >= g.append_max) - goto skip_insert; + goto update_instead_of_insert; - /* Insert, then reset the insert cursor. */ - val_gen(&tinfo->rnd, value, g.rows + 1); ret = col_insert( - cursor_insert, key, value, &keyno); - testutil_check( - cursor_insert->reset(cursor_insert)); + tinfo, cursor, key, value, &keyno); break; } + + /* Insert never leaves the cursor positioned. */ positioned = false; if (ret == 0) { - if (snap != NULL && (size_t) - (snap - snap_list) < WT_ELEMENTS(snap_list)) + ++tinfo->insert; + if (SNAP_TRACK) snap_track(snap++, keyno, g.type == ROW ? key : NULL, value); - } else + } else { if (ret == WT_ROLLBACK && intxn) goto deadlock; - } else if ( - op < g.c_delete_pct + g.c_insert_pct + g.c_write_pct) { + testutil_assert(ret == 0); + } + break; + case UPDATE: +update_instead_of_insert: ++tinfo->update; + + /* Update the row. */ switch (g.type) { case ROW: - key_gen(key, keyno); - val_gen(&tinfo->rnd, value, keyno); - ret = row_update(cursor, key, value, keyno); + ret = row_update(tinfo, + cursor, key, value, keyno, positioned); break; case FIX: case VAR: -skip_insert: val_gen(&tinfo->rnd, value, keyno); - ret = col_update(cursor, key, value, keyno); + ret = col_update(tinfo, + cursor, key, value, keyno, positioned); break; } if (ret == 0) { positioned = true; - if (snap != NULL && (size_t) - (snap - snap_list) < WT_ELEMENTS(snap_list)) + if (SNAP_TRACK) snap_track(snap++, keyno, NULL, value); } else { positioned = false; if (ret == WT_ROLLBACK && intxn) goto deadlock; + testutil_assert(ret == 0); } - } else { + break; + case READ: ++tinfo->search; ret = read_row(cursor, key, value, keyno); if (ret == 0) { positioned = true; - if (snap != NULL && (size_t) - (snap - snap_list) < WT_ELEMENTS(snap_list)) + if (SNAP_TRACK) snap_track(snap++, keyno, NULL, value); } else { positioned = false; if (ret == WT_ROLLBACK && intxn) goto deadlock; + testutil_assert(ret == WT_NOTFOUND); } + break; } /* @@ -727,8 +784,8 @@ skip_insert: val_gen(&tinfo->rnd, value, keyno); testutil_check(cursor->reset(cursor)); /* - * If we're in a transaction, commit 40% of the time and - * rollback 10% of the time. + * Continue if not in a transaction, else add more operations + * to the transaction half the time. */ if (!intxn || (rnd = mmrand(&tinfo->rnd, 1, 10)) > 5) continue; @@ -741,6 +798,10 @@ skip_insert: val_gen(&tinfo->rnd, value, keyno); cursor, snap_list, snap, key, value)) == WT_ROLLBACK) goto deadlock; + /* + * If we're in a transaction, commit 40% of the time and + * rollback 10% of the time. + */ switch (rnd) { case 1: case 2: case 3: case 4: /* 40% */ testutil_check( @@ -1040,27 +1101,94 @@ nextprev(WT_CURSOR *cursor, int next) return (ret); } +#if 0 +/* + * row_reserve -- + * Reserve a row in a row-store file. + */ +static int +row_reserve(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) +{ + WT_DECL_RET; + + if (!positioned) { + key_gen(key, keyno); + cursor->set_key(cursor, key); + } + + if (g.logging == LOG_OPS) + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, + "%-10s{%.*s}", "reserve", (int)key->size, key->data); + + switch (ret = cursor->reserve(cursor)) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: + return (WT_ROLLBACK); + case WT_NOTFOUND: + return (WT_NOTFOUND); + default: + testutil_die(ret, + "row_reserve: reserve row %" PRIu64 " by key", keyno); + } + return (0); +} + +/* + * col_reserve -- + * Reserve a row in a column-store file. + */ +static int +col_reserve(WT_CURSOR *cursor, uint64_t keyno, bool positioned) +{ + WT_DECL_RET; + + if (!positioned) + cursor->set_key(cursor, keyno); + + if (g.logging == LOG_OPS) + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, + "%-10s%" PRIu64, "reserve", keyno); + + switch (ret = cursor->reserve(cursor)) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: + return (WT_ROLLBACK); + case WT_NOTFOUND: + return (WT_NOTFOUND); + default: + testutil_die(ret, "col_reserve: %" PRIu64, keyno); + } + return (0); +} +#endif + /* * row_update -- * Update a row in a row-store file. */ static int -row_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +row_update(TINFO *tinfo, WT_CURSOR *cursor, + WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) { WT_DECL_RET; - WT_SESSION *session; - session = cursor->session; + if (!positioned) { + key_gen(key, keyno); + cursor->set_key(cursor, key); + } + val_gen(&tinfo->rnd, value, keyno); + cursor->set_value(cursor, value); - /* Log the operation */ if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s{%.*s}, {%.*s}", "put", (int)key->size, key->data, (int)value->size, value->data); - cursor->set_key(cursor, key); - cursor->set_value(cursor, value); switch (ret = cursor->update(cursor)) { case 0: break; @@ -1086,32 +1214,32 @@ row_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) * Update a row in a column-store file. */ static int -col_update(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +col_update(TINFO *tinfo, WT_CURSOR *cursor, + WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) { WT_DECL_RET; - WT_SESSION *session; - session = cursor->session; + if (!positioned) + cursor->set_key(cursor, keyno); + val_gen(&tinfo->rnd, value, keyno); + if (g.type == FIX) + cursor->set_value(cursor, *(uint8_t *)value->data); + else + cursor->set_value(cursor, value); - /* Log the operation */ if (g.logging == LOG_OPS) { if (g.type == FIX) - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", "update", keyno, ((uint8_t *)value->data)[0]); else - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {%.*s}", "update", keyno, (int)value->size, (char *)value->data); } - cursor->set_key(cursor, keyno); - if (g.type == FIX) - cursor->set_value(cursor, *(uint8_t *)value->data); - else - cursor->set_value(cursor, value); switch (ret = cursor->update(cursor)) { case 0: break; @@ -1238,22 +1366,29 @@ table_append(uint64_t keyno) * Insert a row in a row-store file. */ static int -row_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +row_insert(TINFO *tinfo, WT_CURSOR *cursor, + WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) { WT_DECL_RET; - WT_SESSION *session; - session = cursor->session; + /* + * If we positioned the cursor already, it's a test of an update using + * the insert method. Otherwise, generate a unique key and insert. + */ + if (!positioned) { + key_gen_insert(&tinfo->rnd, key, keyno); + cursor->set_key(cursor, key); + } + val_gen(&tinfo->rnd, value, keyno); + cursor->set_value(cursor, value); /* Log the operation */ if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s{%.*s}, {%.*s}", "insert", (int)key->size, key->data, (int)value->size, value->data); - cursor->set_key(cursor, key); - cursor->set_value(cursor, value); switch (ret = cursor->insert(cursor)) { case 0: break; @@ -1279,14 +1414,13 @@ row_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) * Insert an element in a column-store file. */ static int -col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) +col_insert(TINFO *tinfo, + WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) { WT_DECL_RET; - WT_SESSION *session; uint64_t keyno; - session = cursor->session; - + val_gen(&tinfo->rnd, value, g.rows + 1); if (g.type == FIX) cursor->set_value(cursor, *(uint8_t *)value->data); else @@ -1307,12 +1441,12 @@ col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) if (g.logging == LOG_OPS) { if (g.type == FIX) - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", "insert", keyno, ((uint8_t *)value->data)[0]); else - (void)g.wt_api->msg_printf(g.wt_api, session, + (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {%.*s}", "insert", keyno, (int)value->size, (char *)value->data); @@ -1335,21 +1469,19 @@ col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) * Remove an row from a row-store file. */ static int -row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) +row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) { WT_DECL_RET; - WT_SESSION *session; - session = cursor->session; - - key_gen(key, keyno); + if (!positioned) { + key_gen(key, keyno); + cursor->set_key(cursor, key); + } - /* Log the operation */ if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf( - g.wt_api, session, "%-10s%" PRIu64, "remove", keyno); + (void)g.wt_api->msg_printf(g.wt_api, + cursor->session, "%-10s%" PRIu64, "remove", keyno); - cursor->set_key(cursor, key); /* We use the cursor in overwrite mode, check for existence. */ if ((ret = cursor->search(cursor)) == 0) ret = cursor->remove(cursor); @@ -1385,19 +1517,17 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) * Remove a row from a column-store file. */ static int -col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno) +col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) { WT_DECL_RET; - WT_SESSION *session; - session = cursor->session; + if (!positioned) + cursor->set_key(cursor, keyno); - /* Log the operation */ if (g.logging == LOG_OPS) - (void)g.wt_api->msg_printf( - g.wt_api, session, "%-10s%" PRIu64, "remove", keyno); + (void)g.wt_api->msg_printf(g.wt_api, + cursor->session, "%-10s%" PRIu64, "remove", keyno); - cursor->set_key(cursor, keyno); /* We use the cursor in overwrite mode, check for existence. */ if ((ret = cursor->search(cursor)) == 0) ret = cursor->remove(cursor); diff --git a/test/suite/test_truncate01.py b/test/suite/test_truncate01.py index 7d2b3862568..98b741ba6a4 100644 --- a/test/suite/test_truncate01.py +++ b/test/suite/test_truncate01.py @@ -128,6 +128,7 @@ class test_truncate_cursor_order(wttest.WiredTigerTestCase): msg = '/the start cursor position is after the stop cursor position/' self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.truncate(None, c1, c2, None), msg) + c1.set_key(ds.key(10)) c2.set_key(ds.key(20)) self.session.truncate(None, c1, c2, None) -- cgit v1.2.1 From 56fa32f25a0745b049789f31e7dd5128be9525a0 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 24 Mar 2017 07:52:59 -0400 Subject: WT-98 Update the current cursor value without a search (#3346) * WT-98 Update the current cursor value without a search When running in-memory and insert/update fails, we should expect WT_ROLLBACK even when not running inside a transaction. * Order the operations alphabetically (they were ordered the way they were because of the order in which we used to choose operations, but that's no longer the case). --- test/format/ops.c | 86 +++++++++++++++++++++++++++---------------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/test/format/ops.c b/test/format/ops.c index 05457ebb5a0..5309edf81c0 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -660,33 +660,6 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ #endif /* Perform the operation. */ switch (op) { - case REMOVE: - switch (g.type) { - case ROW: - ret = - row_remove(cursor, key, keyno, positioned); - break; - case FIX: - case VAR: - ret = - col_remove(cursor, key, keyno, positioned); - break; - } - if (ret == 0) { - ++tinfo->remove; - /* - * Don't set positioned: it's unchanged from the - * previous state, but not necessarily set. - */ - if (SNAP_TRACK) - snap_track(snap++, keyno, NULL, NULL); - } else { - positioned = false; - if (ret == WT_ROLLBACK && intxn) - goto deadlock; - testutil_assert(ret == WT_NOTFOUND); - } - break; case INSERT: switch (g.type) { case ROW: @@ -718,7 +691,48 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ } else { if (ret == WT_ROLLBACK && intxn) goto deadlock; - testutil_assert(ret == 0); + testutil_assert(ret == 0 || ret == WT_ROLLBACK); + } + break; + case READ: + ++tinfo->search; + ret = read_row(cursor, key, value, keyno); + if (ret == 0) { + positioned = true; + if (SNAP_TRACK) + snap_track(snap++, keyno, NULL, value); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) + goto deadlock; + testutil_assert(ret == WT_NOTFOUND); + } + break; + case REMOVE: + switch (g.type) { + case ROW: + ret = + row_remove(cursor, key, keyno, positioned); + break; + case FIX: + case VAR: + ret = + col_remove(cursor, key, keyno, positioned); + break; + } + if (ret == 0) { + ++tinfo->remove; + /* + * Don't set positioned: it's unchanged from the + * previous state, but not necessarily set. + */ + if (SNAP_TRACK) + snap_track(snap++, keyno, NULL, NULL); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) + goto deadlock; + testutil_assert(ret == WT_NOTFOUND); } break; case UPDATE: @@ -745,21 +759,7 @@ update_instead_of_insert: positioned = false; if (ret == WT_ROLLBACK && intxn) goto deadlock; - testutil_assert(ret == 0); - } - break; - case READ: - ++tinfo->search; - ret = read_row(cursor, key, value, keyno); - if (ret == 0) { - positioned = true; - if (SNAP_TRACK) - snap_track(snap++, keyno, NULL, value); - } else { - positioned = false; - if (ret == WT_ROLLBACK && intxn) - goto deadlock; - testutil_assert(ret == WT_NOTFOUND); + testutil_assert(ret == 0 || ret == WT_ROLLBACK); } break; } -- cgit v1.2.1 From 1ceddd4a972bf220db9585739e9fcb283d618da4 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 24 Mar 2017 08:16:21 -0400 Subject: WT-3136 bug fix: WiredTiger doesn't check sprintf calls for error return (#3340) * WT-3136 bug fix: WiredTiger doesn't check sprintf calls for error return Make a pass through the source base to check sprintf, snprintf, vsprintf and vsnprintf calls for errors. * A WiredTiger key is a uint64_t. Use sizeof(), don't hard-wire buffer sizes into the code. * More (u_int) vs. (uint64_t) fixes. * Use CONFIG_APPEND instead of FORMAT_APPEND, it makes more sense. * revert part of 4475ae9, there's an explicit allocation of the size of the buffer. * MVSC complaints: test\format\config.c(765): warning C4018: '<': signed/unsigned mismatch test\format\config.c(765): warning C4018: '>': signed/unsigned mismatch * Change Windows testing shim to correctly use __wt_snprintf * Change Windows test shim to use the __wt_XXX functions * MSDN's _vscprintf API returns the number of characters excluding the termininating nul byte, return that value. --- bench/wtperf/config.c | 11 +- bench/wtperf/idle_table_cycle.c | 4 +- bench/wtperf/misc.c | 4 +- bench/wtperf/track.c | 4 +- bench/wtperf/wtperf.c | 104 +++++++++--------- dist/filelist | 2 +- examples/c/ex_async.c | 6 +- examples/c/ex_backup.c | 28 ++--- examples/c/ex_encrypt.c | 4 +- examples/c/ex_log.c | 12 +-- examples/c/ex_sync.c | 20 ++-- src/block/block_ext.c | 4 +- src/bloom/bloom.c | 4 +- src/btree/bt_debug.c | 16 +-- src/config/config_api.c | 2 +- src/conn/conn_api.c | 9 +- src/cursor/cur_join.c | 15 +-- src/cursor/cur_json.c | 47 ++++---- src/cursor/cur_stat.c | 4 +- src/include/extern_posix.h | 3 +- src/include/extern_win.h | 3 +- src/include/misc.i | 91 ++++++++++++++++ src/include/os_windows.h | 22 ---- src/include/packing.i | 4 +- src/log/log.c | 7 +- src/lsm/lsm_stat.c | 4 +- src/os_common/filename.c | 4 +- src/os_common/os_errno.c | 2 +- src/os_common/os_fstream.c | 2 +- src/os_posix/os_snprintf.c | 27 +++++ src/os_posix/os_thread.c | 10 +- src/os_win/os_snprintf.c | 50 +++++++-- src/os_win/os_thread.c | 6 +- src/os_win/os_vsnprintf.c | 41 ------- src/schema/schema_create.c | 3 +- src/support/err.c | 119 ++++++++++---------- src/support/scratch.c | 9 +- src/utilities/util_backup.c | 15 +-- src/utilities/util_dump.c | 15 +-- src/utilities/util_load.c | 11 +- src/utilities/util_load_json.c | 23 ++-- src/utilities/util_main.c | 17 ++- src/utilities/util_misc.c | 5 +- src/utilities/util_stat.c | 5 +- src/utilities/util_verify.c | 7 +- src/utilities/util_write.c | 8 +- test/bloom/test_bloom.c | 4 +- test/checkpoint/checkpointer.c | 26 +++-- test/checkpoint/test_checkpoint.c | 4 +- test/checkpoint/workers.c | 22 ++-- test/csuite/wt1965_col_efficiency/main.c | 3 +- test/csuite/wt2246_col_append/main.c | 12 +-- test/csuite/wt2323_join_visibility/main.c | 34 +++--- test/csuite/wt2447_join_main_table/main.c | 12 ++- test/csuite/wt2592_join_schema/main.c | 9 +- test/csuite/wt2834_join_bloom_fix/main.c | 20 ++-- test/csuite/wt2853_perf/main.c | 15 +-- test/csuite/wt2909_checkpoint_integrity/main.c | 21 ++-- test/csuite/wt3120_filesys/main.c | 4 +- test/cursor_order/cursor_order.c | 8 +- test/cursor_order/cursor_order_file.c | 30 +++--- test/cursor_order/cursor_order_ops.c | 20 ++-- test/fops/file.c | 6 +- test/fops/t.c | 4 +- test/format/backup.c | 4 +- test/format/config.c | 23 ++-- test/format/ops.c | 10 +- test/format/rebalance.c | 16 +-- test/format/salvage.c | 25 +++-- test/format/util.c | 51 +++++---- test/format/wts.c | 143 ++++++++++++------------- test/manydbs/manydbs.c | 3 +- test/readonly/readonly.c | 45 ++++---- test/recovery/random-abort.c | 20 ++-- test/recovery/truncated-log.c | 14 +-- test/salvage/salvage.c | 26 ++--- test/thread/file.c | 35 +++--- test/thread/rw.c | 41 ++++--- test/thread/stats.c | 3 +- test/thread/t.c | 8 +- test/utility/misc.c | 8 +- test/utility/parse_opts.c | 6 +- test/utility/thread.c | 7 +- test/windows/windows_shim.h | 9 +- 84 files changed, 893 insertions(+), 671 deletions(-) create mode 100644 src/os_posix/os_snprintf.c delete mode 100644 src/os_win/os_vsnprintf.c diff --git a/bench/wtperf/config.c b/bench/wtperf/config.c index 9eea99eeec4..e4eee66e4cb 100644 --- a/bench/wtperf/config.c +++ b/bench/wtperf/config.c @@ -438,14 +438,13 @@ config_opt(WTPERF *wtperf, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v) return (EINVAL); } strp = (char **)valueloc; - newlen = v->len + 1; if (*strp == NULL) begin = newstr = dstrdup(v->str); else { - newlen += strlen(*strp) + 1; - newstr = dcalloc(newlen, sizeof(char)); - snprintf(newstr, newlen, - "%s,%*s", *strp, (int)v->len, v->str); + newlen = strlen(*strp) + v->len + strlen(",") + 1; + newstr = dmalloc(newlen); + testutil_check(__wt_snprintf(newstr, newlen, + "%s,%.*s", *strp, (int)v->len, v->str)); /* Free the old value now we've copied it. */ free(*strp); begin = &newstr[(newlen - 1) - v->len]; @@ -712,7 +711,7 @@ config_opt_name_value(WTPERF *wtperf, const char *name, const char *value) /* name="value" */ len = strlen(name) + strlen(value) + 4; optstr = dmalloc(len); - snprintf(optstr, len, "%s=\"%s\"", name, value); + testutil_check(__wt_snprintf(optstr, len, "%s=\"%s\"", name, value)); ret = config_opt_str(wtperf, optstr); free(optstr); return (ret); diff --git a/bench/wtperf/idle_table_cycle.c b/bench/wtperf/idle_table_cycle.c index bb44cfbde59..4387860cfb2 100644 --- a/bench/wtperf/idle_table_cycle.c +++ b/bench/wtperf/idle_table_cycle.c @@ -80,8 +80,8 @@ cycle_idle_tables(void *arg) } for (cycle_count = 0; wtperf->idle_cycle_run; ++cycle_count) { - snprintf(uri, sizeof(uri), - "%s_cycle%07d", wtperf->uris[0], cycle_count); + testutil_check(__wt_snprintf(uri, sizeof(uri), + "%s_cycle%07d", wtperf->uris[0], cycle_count)); /* Don't busy cycle in this loop. */ __wt_sleep(1, 0); diff --git a/bench/wtperf/misc.c b/bench/wtperf/misc.c index 24b3323a49a..0874794e01e 100644 --- a/bench/wtperf/misc.c +++ b/bench/wtperf/misc.c @@ -46,8 +46,8 @@ setup_log_file(WTPERF *wtperf) len = strlen(wtperf->monitor_dir) + strlen(opts->table_name) + strlen(".stat") + 2; fname = dmalloc(len); - snprintf(fname, len, - "%s/%s.stat", wtperf->monitor_dir, opts->table_name); + testutil_check(__wt_snprintf(fname, len, + "%s/%s.stat", wtperf->monitor_dir, opts->table_name)); if ((wtperf->logf = fopen(fname, "w")) == NULL) { ret = errno; fprintf(stderr, "%s: %s\n", fname, strerror(ret)); diff --git a/bench/wtperf/track.c b/bench/wtperf/track.c index 822bdaa4b4a..86a26120a6a 100644 --- a/bench/wtperf/track.c +++ b/bench/wtperf/track.c @@ -288,8 +288,8 @@ latency_print_single(WTPERF *wtperf, TRACK *total, const char *name) uint64_t cumops; char path[1024]; - snprintf(path, sizeof(path), - "%s/latency.%s", wtperf->monitor_dir, name); + testutil_check(__wt_snprintf(path, sizeof(path), + "%s/latency.%s", wtperf->monitor_dir, name)); if ((fp = fopen(path, "w")) == NULL) { lprintf(wtperf, errno, 0, "%s", path); return; diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 772dedac8c8..1eedaba4f32 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -551,7 +551,8 @@ worker(void *arg) goto err; } for (i = 0; i < opts->table_count_idle; i++) { - snprintf(buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i); + testutil_check(__wt_snprintf( + buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i)); if ((ret = session->open_cursor( session, buf, NULL, NULL, &tmp_cursor)) != 0) { lprintf(wtperf, ret, 0, @@ -1297,7 +1298,8 @@ monitor(void *arg) /* Open the logging file. */ len = strlen(wtperf->monitor_dir) + 100; path = dmalloc(len); - snprintf(path, len, "%s/monitor", wtperf->monitor_dir); + testutil_check(__wt_snprintf( + path, len, "%s/monitor", wtperf->monitor_dir)); if ((fp = fopen(path, "w")) == NULL) { lprintf(wtperf, errno, 0, "%s", path); goto err; @@ -1937,19 +1939,19 @@ create_uris(WTPERF *wtperf) /* If there is only one table, just use the base name. */ wtperf->uris[i] = dmalloc(len); if (opts->table_count == 1) - snprintf(wtperf->uris[i], - len, "table:%s", opts->table_name); + testutil_check(__wt_snprintf(wtperf->uris[i], + len, "table:%s", opts->table_name)); else - snprintf(wtperf->uris[i], - len, "table:%s%05d", opts->table_name, i); + testutil_check(__wt_snprintf(wtperf->uris[i], + len, "table:%s%05d", opts->table_name, i)); } /* Create the log-like-table URI. */ len = strlen("table:") + strlen(opts->table_name) + strlen("_log_table") + 1; wtperf->log_table_uri = dmalloc(len); - snprintf( - wtperf->log_table_uri, len, "table:%s_log_table", opts->table_name); + testutil_check(__wt_snprintf(wtperf->log_table_uri, + len, "table:%s_log_table", opts->table_name)); } static int @@ -1971,7 +1973,8 @@ create_tables(WTPERF *wtperf) } for (i = 0; i < opts->table_count_idle; i++) { - snprintf(buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i); + testutil_check(__wt_snprintf( + buf, 512, "%s_idle%05d", wtperf->uris[0], (int)i)); if ((ret = session->create( session, buf, opts->table_config)) != 0) { lprintf(wtperf, ret, 0, @@ -2000,8 +2003,9 @@ create_tables(WTPERF *wtperf) return (ret); } if (opts->index) { - snprintf(buf, 512, "index:%s:val_idx", - wtperf->uris[i] + strlen("table:")); + testutil_check(__wt_snprintf(buf, 512, + "index:%s:val_idx", + wtperf->uris[i] + strlen("table:"))); if ((ret = session->create( session, buf, "columns=(val)")) != 0) { lprintf(wtperf, ret, 0, @@ -2186,15 +2190,15 @@ start_all_runs(WTPERF *wtperf) */ len = strlen(wtperf->home) + 5; next_wtperf->home = dmalloc(len); - snprintf( - next_wtperf->home, len, "%s/D%02d", wtperf->home, (int)i); + testutil_check(__wt_snprintf( + next_wtperf->home, len, "%s/D%02d", wtperf->home, (int)i)); if (opts->create != 0) recreate_dir(next_wtperf->home); len = strlen(wtperf->monitor_dir) + 5; next_wtperf->monitor_dir = dmalloc(len); - snprintf(next_wtperf->monitor_dir, - len, "%s/D%02d", wtperf->monitor_dir, (int)i); + testutil_check(__wt_snprintf(next_wtperf->monitor_dir, + len, "%s/D%02d", wtperf->monitor_dir, (int)i)); if (opts->create != 0 && strcmp(next_wtperf->home, next_wtperf->monitor_dir) != 0) recreate_dir(next_wtperf->monitor_dir); @@ -2543,9 +2547,9 @@ main(int argc, char *argv[]) */ req_len = strlen(",async=(enabled=true,threads=)") + 4; wtperf->async_config = dmalloc(req_len); - snprintf(wtperf->async_config, req_len, + testutil_check(__wt_snprintf(wtperf->async_config, req_len, ",async=(enabled=true,threads=%" PRIu32 ")", - opts->async_threads); + opts->async_threads)); } if ((ret = config_compress(wtperf)) != 0) goto err; @@ -2578,10 +2582,10 @@ main(int argc, char *argv[]) sreq_len = strlen("session_max=") + 6; req_len += sreq_len; sess_cfg = dmalloc(sreq_len); - snprintf(sess_cfg, sreq_len, + testutil_check(__wt_snprintf(sess_cfg, sreq_len, "session_max=%" PRIu32, opts->session_count_idle + - wtperf->workers_cnt + opts->populate_threads + 10); + wtperf->workers_cnt + opts->populate_threads + 10)); } req_len += opts->in_memory ? strlen("in_memory=true") : 0; req_len += user_cconfig != NULL ? strlen(user_cconfig) : 0; @@ -2591,16 +2595,16 @@ main(int argc, char *argv[]) append_comma = ""; if (wtperf->async_config != NULL && strlen(wtperf->async_config) != 0) { - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, wtperf->async_config); + testutil_check(__wt_snprintf_len_incr( + cc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, wtperf->async_config)); append_comma = ","; } if (wtperf->compress_ext != NULL && strlen(wtperf->compress_ext) != 0) { - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, wtperf->compress_ext); + testutil_check(__wt_snprintf_len_incr( + cc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, wtperf->compress_ext)); append_comma = ","; } if (opts->in_memory) { @@ -2610,15 +2614,15 @@ main(int argc, char *argv[]) append_comma = ","; } if (sess_cfg != NULL && strlen(sess_cfg) != 0) { - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, sess_cfg); + testutil_check(__wt_snprintf_len_incr( + cc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, sess_cfg)); append_comma = ","; } if (user_cconfig != NULL && strlen(user_cconfig) != 0) { - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", - append_comma, user_cconfig); + testutil_check(__wt_snprintf_len_incr( + cc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, user_cconfig)); append_comma = ","; } @@ -2639,21 +2643,21 @@ main(int argc, char *argv[]) append_comma = ""; if (wtperf->compress_table != NULL && strlen(wtperf->compress_table) != 0) { - pos += (size_t)snprintf( - tc_buf + pos, req_len - pos, "%s%s", - append_comma, wtperf->compress_table); + testutil_check(__wt_snprintf_len_incr( + tc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, wtperf->compress_table)); append_comma = ","; } if (opts->index) { - pos += (size_t)snprintf( - tc_buf + pos, req_len - pos, "%s%s", - append_comma, INDEX_COL_NAMES); + testutil_check(__wt_snprintf_len_incr( + tc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, INDEX_COL_NAMES)); append_comma = ","; } if (user_tconfig != NULL && strlen(user_tconfig) != 0) { - pos += (size_t)snprintf( - tc_buf + pos, req_len - pos, "%s%s", - append_comma, user_tconfig); + testutil_check(__wt_snprintf_len_incr( + tc_buf + pos, req_len - pos, &pos, "%s%s", + append_comma, user_tconfig)); append_comma = ","; } @@ -2665,8 +2669,9 @@ main(int argc, char *argv[]) req_len = strlen(opts->table_config) + strlen(LOG_PARTIAL_CONFIG) + 1; wtperf->partial_config = dmalloc(req_len); - snprintf(wtperf->partial_config, req_len, "%s%s", - opts->table_config, LOG_PARTIAL_CONFIG); + testutil_check(__wt_snprintf( + wtperf->partial_config, req_len, "%s%s", + opts->table_config, LOG_PARTIAL_CONFIG)); } /* * Set the config for reopen. If readonly add in that string. @@ -2679,11 +2684,12 @@ main(int argc, char *argv[]) req_len = strlen(opts->conn_config) + 1; wtperf->reopen_config = dmalloc(req_len); if (opts->readonly) - snprintf(wtperf->reopen_config, req_len, "%s%s", - opts->conn_config, READONLY_CONFIG); + testutil_check(__wt_snprintf( + wtperf->reopen_config, req_len, "%s%s", + opts->conn_config, READONLY_CONFIG)); else - snprintf(wtperf->reopen_config, - req_len, "%s", opts->conn_config); + testutil_check(__wt_snprintf( + wtperf->reopen_config, req_len, "%s", opts->conn_config)); /* Sanity-check the configuration. */ if ((ret = config_sanity(wtperf)) != 0) @@ -2696,7 +2702,8 @@ main(int argc, char *argv[]) /* Write a copy of the config. */ req_len = strlen(wtperf->home) + strlen("/CONFIG.wtperf") + 1; path = dmalloc(req_len); - snprintf(path, req_len, "%s/CONFIG.wtperf", wtperf->home); + testutil_check(__wt_snprintf( + path, req_len, "%s/CONFIG.wtperf", wtperf->home)); config_opt_log(opts, path); free(path); @@ -2821,7 +2828,8 @@ recreate_dir(const char *name) len = strlen(name) * 2 + 100; buf = dmalloc(len); - (void)snprintf(buf, len, "rm -rf %s && mkdir %s", name, name); + testutil_check(__wt_snprintf( + buf, len, "rm -rf %s && mkdir %s", name, name)); testutil_checkfmt(system(buf), "system: %s", buf); free(buf); } diff --git a/dist/filelist b/dist/filelist index 3886035eaa9..5a3348b940a 100644 --- a/dist/filelist +++ b/dist/filelist @@ -133,6 +133,7 @@ src/os_posix/os_path.c POSIX_HOST src/os_posix/os_priv.c POSIX_HOST src/os_posix/os_setvbuf.c POSIX_HOST src/os_posix/os_sleep.c POSIX_HOST +src/os_posix/os_snprintf.c POSIX_HOST src/os_posix/os_thread.c POSIX_HOST src/os_posix/os_time.c POSIX_HOST src/os_posix/os_yield.c POSIX_HOST @@ -152,7 +153,6 @@ src/os_win/os_snprintf.c WINDOWS_HOST src/os_win/os_thread.c WINDOWS_HOST src/os_win/os_time.c WINDOWS_HOST src/os_win/os_utf8.c WINDOWS_HOST -src/os_win/os_vsnprintf.c WINDOWS_HOST src/os_win/os_winerr.c WINDOWS_HOST src/os_win/os_yield.c WINDOWS_HOST src/packing/pack_api.c diff --git a/examples/c/ex_async.c b/examples/c/ex_async.c index f7531a5c3d8..5cfafca0418 100644 --- a/examples/c/ex_async.c +++ b/examples/c/ex_async.c @@ -170,12 +170,12 @@ main(void) * an asynchronous insert. */ /*! [async set the operation's string key] */ - snprintf(k[i], sizeof(k), "key%d", i); + (void)snprintf(k[i], sizeof(k), "key%d", i); op->set_key(op, k[i]); /*! [async set the operation's string key] */ /*! [async set the operation's string value] */ - snprintf(v[i], sizeof(v), "value%d", i); + (void)snprintf(v[i], sizeof(v), "value%d", i); op->set_value(op, v[i]); /*! [async set the operation's string value] */ @@ -218,7 +218,7 @@ main(void) * Set the operation's string key and value, and then do * an asynchronous search. */ - snprintf(k[i], sizeof(k), "key%d", i); + (void)snprintf(k[i], sizeof(k), "key%d", i); op->set_key(op, k[i]); ret = op->search(op); /*! [async search] */ diff --git a/examples/c/ex_backup.c b/examples/c/ex_backup.c index 0697cbb3458..83cc9b22ecc 100644 --- a/examples/c/ex_backup.c +++ b/examples/c/ex_backup.c @@ -96,7 +96,7 @@ compare_backups(int i) if (i == 0) (void)strncpy(msg, "MAIN", sizeof(msg)); else - snprintf(msg, sizeof(msg), "%d", i); + (void)snprintf(msg, sizeof(msg), "%d", i); printf( "Iteration %s: Tables %s.%d and %s.%d %s\n", msg, full_out, i, incr_out, i, ret == 0 ? "identical" : "differ"); @@ -131,8 +131,8 @@ setup_directories(void) * For incremental backups we need 0-N. The 0 incremental * directory will compare with the original at the end. */ - snprintf(buf, sizeof(buf), "rm -rf %s.%d && mkdir %s.%d", - home_incr, i, home_incr, i); + (void)snprintf(buf, sizeof(buf), + "rm -rf %s.%d && mkdir %s.%d", home_incr, i, home_incr, i); if ((ret = system(buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", buf, ret); return (ret); @@ -142,8 +142,8 @@ setup_directories(void) /* * For full backups we need 1-N. */ - snprintf(buf, sizeof(buf), "rm -rf %s.%d && mkdir %s.%d", - home_full, i, home_full, i); + (void)snprintf(buf, sizeof(buf), + "rm -rf %s.%d && mkdir %s.%d", home_full, i, home_full, i); if ((ret = system(buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", buf, ret); return (ret); @@ -164,8 +164,8 @@ add_work(WT_SESSION *session, int iter) * Perform some operations with individual auto-commit transactions. */ for (i = 0; i < MAX_KEYS; i++) { - snprintf(k, sizeof(k), "key.%d.%d", iter, i); - snprintf(v, sizeof(v), "value.%d.%d", iter, i); + (void)snprintf(k, sizeof(k), "key.%d.%d", iter, i); + (void)snprintf(v, sizeof(v), "value.%d.%d", iter, i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); @@ -187,7 +187,7 @@ take_full_backup(WT_SESSION *session, int i) * directories. Otherwise only into the appropriate full directory. */ if (i != 0) { - snprintf(h, sizeof(h), "%s.%d", home_full, i); + (void)snprintf(h, sizeof(h), "%s.%d", home_full, i); hdir = h; } else hdir = home_incr; @@ -200,14 +200,15 @@ take_full_backup(WT_SESSION *session, int i) * Take a full backup into each incremental directory. */ for (j = 0; j < MAX_ITERATIONS; j++) { - snprintf(h, sizeof(h), "%s.%d", home_incr, j); + (void)snprintf(h, sizeof(h), + "%s.%d", home_incr, j); (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename); ret = system(buf); } else { - snprintf(h, sizeof(h), "%s.%d", home_full, i); + (void)snprintf(h, sizeof(h), "%s.%d", home_full, i); (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, hdir, filename); ret = system(buf); @@ -237,12 +238,12 @@ take_incr_backup(WT_SESSION *session, int i) * Copy into the 0 incremental directory and then each of the * incremental directories for this iteration and later. */ - snprintf(h, sizeof(h), "%s.0", home_incr); + (void)snprintf(h, sizeof(h), "%s.0", home_incr); (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename); ret = system(buf); for (j = i; j < MAX_ITERATIONS; j++) { - snprintf(h, sizeof(h), "%s.%d", home_incr, j); + (void)snprintf(h, sizeof(h), "%s.%d", home_incr, j); (void)snprintf(buf, sizeof(buf), "cp %s/%s %s/%s", home, filename, h, filename); ret = system(buf); @@ -270,7 +271,8 @@ main(void) int i, ret; char cmd_buf[256]; - snprintf(cmd_buf, sizeof(cmd_buf), "rm -rf %s && mkdir %s", home, home); + (void)snprintf(cmd_buf, sizeof(cmd_buf), + "rm -rf %s && mkdir %s", home, home); if ((ret = system(cmd_buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", cmd_buf, ret); return (EXIT_FAILURE); diff --git a/examples/c/ex_encrypt.c b/examples/c/ex_encrypt.c index 00dc66fc24d..1520bd286cd 100644 --- a/examples/c/ex_encrypt.c +++ b/examples/c/ex_encrypt.c @@ -507,12 +507,12 @@ main(void) * we decrypt on read. */ for (i = 0; i < MAX_KEYS; i++) { - snprintf(keybuf, sizeof(keybuf), "key%d", i); + (void)snprintf(keybuf, sizeof(keybuf), "key%d", i); c1->set_key(c1, keybuf); c2->set_key(c2, keybuf); nc->set_key(nc, keybuf); - snprintf(valbuf, sizeof(valbuf), "value%d", i); + (void)snprintf(valbuf, sizeof(valbuf), "value%d", i); c1->set_value(c1, valbuf); c2->set_value(c2, valbuf); nc->set_value(nc, valbuf); diff --git a/examples/c/ex_log.c b/examples/c/ex_log.c index fdbc39412ae..0d8fbf97233 100644 --- a/examples/c/ex_log.c +++ b/examples/c/ex_log.c @@ -291,8 +291,8 @@ main(void) char cmd_buf[256], k[16], v[16]; count_min = 0; - snprintf(cmd_buf, sizeof(cmd_buf), "rm -rf %s %s && mkdir %s %s", - home1, home2, home1, home2); + (void)snprintf(cmd_buf, sizeof(cmd_buf), + "rm -rf %s %s && mkdir %s %s", home1, home2, home1, home2); if ((ret = system(cmd_buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", cmd_buf, ret); return (EXIT_FAILURE); @@ -312,8 +312,8 @@ main(void) * Perform some operations with individual auto-commit transactions. */ for (record_count = 0, i = 0; i < MAX_KEYS; i++, record_count++) { - snprintf(k, sizeof(k), "key%d", i); - snprintf(v, sizeof(v), "value%d", i); + (void)snprintf(k, sizeof(k), "key%d", i); + (void)snprintf(v, sizeof(v), "value%d", i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); @@ -324,8 +324,8 @@ main(void) * Perform some operations within a single transaction. */ for (i = MAX_KEYS; i < MAX_KEYS+5; i++, record_count++) { - snprintf(k, sizeof(k), "key%d", i); - snprintf(v, sizeof(v), "value%d", i); + (void)snprintf(k, sizeof(k), "key%d", i); + (void)snprintf(v, sizeof(v), "value%d", i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); diff --git a/examples/c/ex_sync.c b/examples/c/ex_sync.c index 2c610b1e570..b2d74b52f7f 100644 --- a/examples/c/ex_sync.c +++ b/examples/c/ex_sync.c @@ -59,8 +59,8 @@ main(void) char cmd_buf[256], k[16], v[16]; const char *conf; - snprintf(cmd_buf, sizeof(cmd_buf), "rm -rf %s && mkdir %s", - home, home); + (void)snprintf(cmd_buf, sizeof(cmd_buf), + "rm -rf %s && mkdir %s", home, home); if ((ret = system(cmd_buf)) != 0) { fprintf(stderr, "%s: failed ret %d\n", cmd_buf, ret); return (EXIT_FAILURE); @@ -98,8 +98,8 @@ main(void) ret = session->commit_transaction(session, conf); ret = session->begin_transaction(session, NULL); } - snprintf(k, sizeof(k), "key%d", i); - snprintf(v, sizeof(v), "value%d", i); + (void)snprintf(k, sizeof(k), "key%d", i); + (void)snprintf(v, sizeof(v), "value%d", i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); @@ -113,8 +113,8 @@ main(void) * Perform some operations within a single transaction. */ for (i = MAX_KEYS; i < MAX_KEYS+5; i++, record_count++) { - snprintf(k, sizeof(k), "key%d", i); - snprintf(v, sizeof(v), "value%d", i); + (void)snprintf(k, sizeof(k), "key%d", i); + (void)snprintf(v, sizeof(v), "value%d", i); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); @@ -129,8 +129,8 @@ main(void) * Demonstrate using log_flush to force the log to disk. */ for (i = 0; i < MAX_KEYS; i++, record_count++) { - snprintf(k, sizeof(k), "key%d", record_count); - snprintf(v, sizeof(v), "value%d", record_count); + (void)snprintf(k, sizeof(k), "key%d", record_count); + (void)snprintf(v, sizeof(v), "value%d", record_count); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); @@ -138,8 +138,8 @@ main(void) ret = session->log_flush(session, "sync=on"); for (i = 0; i < MAX_KEYS; i++, record_count++) { - snprintf(k, sizeof(k), "key%d", record_count); - snprintf(v, sizeof(v), "value%d", record_count); + (void)snprintf(k, sizeof(k), "key%d", record_count); + (void)snprintf(v, sizeof(v), "value%d", record_count); cursor->set_key(cursor, k); cursor->set_value(cursor, v); ret = cursor->insert(cursor); diff --git a/src/block/block_ext.c b/src/block/block_ext.c index e9357d73d1d..da7a06d873d 100644 --- a/src/block/block_ext.c +++ b/src/block/block_ext.c @@ -1378,8 +1378,8 @@ __wt_block_extlist_init(WT_SESSION_IMPL *session, size = (name == NULL ? 0 : strlen(name)) + strlen(".") + (extname == NULL ? 0 : strlen(extname) + 1); WT_RET(__wt_calloc_def(session, size, &el->name)); - (void)snprintf(el->name, size, "%s.%s", - name == NULL ? "" : name, extname == NULL ? "" : extname); + WT_RET(__wt_snprintf(el->name, size, "%s.%s", + name == NULL ? "" : name, extname == NULL ? "" : extname)); el->offset = WT_BLOCK_INVALID_OFFSET; el->track_size = track_size; diff --git a/src/bloom/bloom.c b/src/bloom/bloom.c index be3230437d3..b8d75678835 100644 --- a/src/bloom/bloom.c +++ b/src/bloom/bloom.c @@ -37,8 +37,8 @@ __bloom_init(WT_SESSION_IMPL *session, len += strlen(config); WT_ERR(__wt_calloc_def(session, len, &bloom->config)); /* Add the standard config at the end, so it overrides user settings. */ - (void)snprintf(bloom->config, len, - "%s,%s", config == NULL ? "" : config, WT_BLOOM_TABLE_CONFIG); + WT_ERR(__wt_snprintf(bloom->config, len, + "%s,%s", config == NULL ? "" : config, WT_BLOOM_TABLE_CONFIG)); bloom->session = session; diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index 4989301468f..d3f02e29b90 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -64,7 +64,7 @@ __wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v) const char *cfg[2] = { NULL, NULL }; char buf[256]; - snprintf(buf, sizeof(buf), "verbose=[%s]", v); + WT_RET(__wt_snprintf(buf, sizeof(buf), "verbose=[%s]", v)); cfg[0] = buf; return (__wt_verbose_config(session, cfg)); } @@ -87,6 +87,7 @@ __debug_hex_byte(WT_DBG *ds, uint8_t v) static int __dmsg_event(WT_DBG *ds, const char *fmt, ...) { + WT_DECL_RET; WT_ITEM *msg; WT_SESSION_IMPL *session; size_t len, space; @@ -107,8 +108,9 @@ __dmsg_event(WT_DBG *ds, const char *fmt, ...) p = (char *)msg->mem + msg->size; space = msg->memsize - msg->size; va_start(ap, fmt); - len = (size_t)vsnprintf(p, space, fmt, ap); + ret = __wt_vsnprintf_len_set(p, space, &len, fmt, ap); va_end(ap); + WT_RET(ret); /* Check if there was enough space. */ if (len < space) { @@ -447,13 +449,14 @@ __debug_tree_shape_info(WT_PAGE *page) v = page->memory_footprint; if (v >= WT_GIGABYTE) - snprintf(buf, sizeof(buf), + (void)__wt_snprintf(buf, sizeof(buf), "(%p %" PRIu64 "G)", (void *)page, v / WT_GIGABYTE); else if (v >= WT_MEGABYTE) - snprintf(buf, sizeof(buf), + (void)__wt_snprintf(buf, sizeof(buf), "(%p %" PRIu64 "M)", (void *)page, v / WT_MEGABYTE); else - snprintf(buf, sizeof(buf), "(%p %" PRIu64 ")", (void *)page, v); + (void)__wt_snprintf(buf, sizeof(buf), + "(%p %" PRIu64 ")", (void *)page, v); return (buf); } @@ -838,7 +841,8 @@ __debug_page_col_var(WT_DBG *ds, WT_REF *ref) __wt_cell_unpack(cell, unpack); rle = __wt_cell_rle(unpack); } - snprintf(tag, sizeof(tag), "%" PRIu64 " %" PRIu64, recno, rle); + WT_RET(__wt_snprintf( + tag, sizeof(tag), "%" PRIu64 " %" PRIu64, recno, rle)); WT_RET( __debug_cell_data(ds, page, WT_PAGE_COL_VAR, tag, unpack)); diff --git a/src/config/config_api.c b/src/config/config_api.c index 9f70ba65e9b..88e173459f9 100644 --- a/src/config/config_api.c +++ b/src/config/config_api.c @@ -278,7 +278,7 @@ __wt_configure_method(WT_SESSION_IMPL *session, entry->method = (*epp)->method; len = strlen((*epp)->base) + strlen(",") + strlen(config) + 1; WT_ERR(__wt_calloc_def(session, len, &p)); - snprintf(p, len, "%s,%s", (*epp)->base, config); + WT_ERR(__wt_snprintf(p, len, "%s,%s", (*epp)->base, config)); entry->base = p; /* diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 124250a7a7d..68d45678965 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1662,8 +1662,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR_MSG(session, EINVAL, "Creating a new database is incompatible with " "read-only configuration"); - len = (size_t)snprintf(buf, sizeof(buf), - "%s\n%s\n", WT_WIREDTIGER, WIREDTIGER_VERSION_STRING); + WT_ERR(__wt_snprintf_len_set(buf, sizeof(buf), &len, + "%s\n%s\n", WT_WIREDTIGER, WIREDTIGER_VERSION_STRING)); WT_ERR(__wt_write(session, fh, (wt_off_t)0, len, buf)); WT_ERR(__wt_fsync(session, fh, true)); } else { @@ -2250,10 +2250,9 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR(__wt_scr_alloc(session, 0, &i3)); cfg[0] = WT_CONFIG_BASE(session, wiredtiger_open_all); cfg[1] = NULL; - WT_ERR_TEST(snprintf(version, sizeof(version), + WT_ERR(__wt_snprintf(version, sizeof(version), "version=(major=%d,minor=%d)", - WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR) >= - (int)sizeof(version), ENOMEM); + WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR)); __conn_config_append(cfg, version); /* Ignore the base_config file if config_base_set is false. */ diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 6135132601b..80afaf798dc 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -185,7 +185,7 @@ __curjoin_iter_set_entry(WT_CURSOR_JOIN_ITER *iter, u_int entry_pos) size = strlen(to_dup->internal_uri) + 3; WT_ERR(__wt_calloc(session, size, 1, &uri)); - snprintf(uri, size, "%s()", to_dup->internal_uri); + WT_ERR(__wt_snprintf(uri, size, "%s()", to_dup->internal_uri)); if ((c = iter->cursor) == NULL || !WT_STREQ(c->uri, uri)) { iter->cursor = NULL; if (c != NULL) @@ -929,7 +929,7 @@ __curjoin_init_next(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, if ((proj = cjoin->projection) != NULL) { size = strlen(urimain) + strlen(proj) + 1; WT_ERR(__wt_calloc(session, size, 1, &mainbuf)); - snprintf(mainbuf, size, "%s%s", urimain, proj); + WT_ERR(__wt_snprintf(mainbuf, size, "%s%s", urimain, proj)); urimain = mainbuf; } WT_ERR(__wt_open_cursor(session, urimain, (WT_CURSOR *)cjoin, config, @@ -1148,8 +1148,8 @@ __curjoin_open_main(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, newsize = strlen(cjoin->table->name) + idx->colconf.len + 1; WT_ERR(__wt_calloc(session, 1, newsize, &main_uri)); - snprintf(main_uri, newsize, "%s%.*s", - cjoin->table->name, (int)idx->colconf.len, idx->colconf.str); + WT_ERR(__wt_snprintf(main_uri, newsize, "%s%.*s", + cjoin->table->name, (int)idx->colconf.len, idx->colconf.str)); WT_ERR(__wt_open_cursor(session, main_uri, (WT_CURSOR *)cjoin, raw_cfg, &entry->main)); if (idx->extractor == NULL) { @@ -1162,7 +1162,8 @@ __curjoin_open_main(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, */ len = strlen(entry->main->value_format) + 3; WT_ERR(__wt_calloc(session, len, 1, &newformat)); - snprintf(newformat, len, "%s0x", entry->main->value_format); + WT_ERR(__wt_snprintf( + newformat, len, "%s0x", entry->main->value_format)); __wt_free(session, entry->main->value_format); entry->main->value_format = newformat; } @@ -1531,8 +1532,8 @@ __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, len = strlen(cindex->iface.key_format) + 3; WT_RET(__wt_calloc(session, len, 1, &entry->repack_format)); - snprintf(entry->repack_format, len, "%s0x", - cindex->iface.key_format); + WT_RET(__wt_snprintf(entry->repack_format, + len, "%s0x", cindex->iface.key_format)); } } return (0); diff --git a/src/cursor/cur_json.c b/src/cursor/cur_json.c index 0ad3c4f4201..e8ddb767863 100644 --- a/src/cursor/cur_json.c +++ b/src/cursor/cur_json.c @@ -8,8 +8,8 @@ #include "wt_internal.h" -static size_t __json_unpack_put(WT_SESSION_IMPL *, void *, u_char *, size_t, - WT_CONFIG_ITEM *); +static int __json_unpack_put( + WT_SESSION_IMPL *, void *, u_char *, size_t, WT_CONFIG_ITEM *, size_t *); static inline int __json_struct_size(WT_SESSION_IMPL *, const void *, size_t, const char *, WT_CONFIG_ITEM *, bool, size_t *); static inline int __json_struct_unpackv(WT_SESSION_IMPL *, const void *, size_t, @@ -61,22 +61,22 @@ static int __json_pack_size(WT_SESSION_IMPL *, const char *, WT_CONFIG_ITEM *, * __json_unpack_put -- * Calculate the size of a packed byte string as formatted for JSON. */ -static size_t +static int __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, - u_char *buf, size_t bufsz, WT_CONFIG_ITEM *name) + u_char *buf, size_t bufsz, WT_CONFIG_ITEM *name, size_t *retsizep) { WT_PACK_VALUE *pv; const u_char *p, *end; size_t s, n; pv = (WT_PACK_VALUE *)voidpv; - s = (size_t)snprintf((char *)buf, bufsz, "\"%.*s\" : ", - (int)name->len, name->str); + + WT_RET(__wt_snprintf_len_set( + (char *)buf, bufsz, &s, "\"%.*s\" : ", (int)name->len, name->str)); if (s <= bufsz) { bufsz -= s; buf += s; - } - else + } else bufsz = 0; switch (pv->type) { @@ -118,7 +118,8 @@ __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, } if (bufsz > 0) *buf++ = '"'; - return (s); + *retsizep += s; + return (0); case 'U': case 'u': s += 2; @@ -140,14 +141,17 @@ __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, } if (bufsz > 0) *buf++ = '"'; - return (s); + *retsizep += s; + return (0); case 'b': case 'h': case 'i': case 'l': case 'q': - return (s + - (size_t)snprintf((char *)buf, bufsz, "%" PRId64, pv->u.i)); + WT_RET(__wt_snprintf_len_incr( + (char *)buf, bufsz, &s, "%" PRId64, pv->u.i)); + *retsizep += s; + return (0); case 'B': case 't': case 'H': @@ -156,11 +160,14 @@ __json_unpack_put(WT_SESSION_IMPL *session, void *voidpv, case 'Q': case 'r': case 'R': - return (s + - (size_t)snprintf((char *)buf, bufsz, "%" PRId64, pv->u.u)); + WT_RET(__wt_snprintf_len_incr( + (char *)buf, bufsz, &s, "%" PRId64, pv->u.u)); + *retsizep += s; + return (0); } - __wt_err(session, EINVAL, "unknown pack-value type: %c", (int)pv->type); - return ((size_t)-1); + + WT_RET_MSG(session, EINVAL, + "unknown pack-value type: %c", (int)pv->type); } /* @@ -194,7 +201,8 @@ __json_struct_size(WT_SESSION_IMPL *session, const void *buffer, needcr = true; WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p))); WT_RET(__pack_name_next(&packname, &name)); - result += __json_unpack_put(session, &pv, NULL, 0, &name); + WT_RET( + __json_unpack_put(session, &pv, NULL, 0, &name, &result)); } if (ret == WT_NOTFOUND) ret = 0; @@ -243,8 +251,9 @@ __json_struct_unpackv(WT_SESSION_IMPL *session, needcr = true; WT_RET(__unpack_read(session, &pv, &p, (size_t)(end - p))); WT_RET(__pack_name_next(&packname, &name)); - jsize = __json_unpack_put(session, - (u_char *)&pv, jbuf, jbufsize, &name); + jsize = 0; + WT_RET(__json_unpack_put(session, + (u_char *)&pv, jbuf, jbufsize, &name, &jsize)); WT_ASSERT(session, jsize <= jbufsize); jbuf += jsize; jbufsize -= jsize; diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c index c5ccdb1b649..0bff642370d 100644 --- a/src/cursor/cur_stat.c +++ b/src/cursor/cur_stat.c @@ -477,8 +477,8 @@ __curstat_join_desc(WT_CURSOR_STAT *cst, int slot, const char **resultp) len = strlen("join: ") + strlen(sgrp->desc_prefix) + strlen(static_desc) + 1; WT_RET(__wt_realloc(session, NULL, len, &cst->desc_buf)); - snprintf(cst->desc_buf, len, "join: %s%s", sgrp->desc_prefix, - static_desc); + WT_RET(__wt_snprintf( + cst->desc_buf, len, "join: %s%s", sgrp->desc_prefix, static_desc)); *resultp = cst->desc_buf; return (0); } diff --git a/src/include/extern_posix.h b/src/include/extern_posix.h index fed7835ada1..57d94e392d1 100644 --- a/src/include/extern_posix.h +++ b/src/include/extern_posix.h @@ -24,8 +24,9 @@ extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden") extern void __wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_yield(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); diff --git a/src/include/extern_win.h b/src/include/extern_win.h index 0bfc821c7a6..43127a0c79f 100644 --- a/src/include/extern_win.h +++ b/src/include/extern_win.h @@ -22,9 +22,10 @@ extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden") extern void __wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_to_utf16_string( WT_SESSION_IMPL *session, const char*utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_to_utf8_string( WT_SESSION_IMPL *session, const wchar_t*wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/misc.i b/src/include/misc.i index d5692a3f9cf..7040886cf82 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -86,3 +86,94 @@ __wt_verbose(WT_SESSION_IMPL *session, int flag, const char *fmt, ...) WT_UNUSED(fmt); #endif } + +/* + * __wt_snprintf -- + * snprintf convenience function, ignoring the returned size. + */ +static inline int +__wt_snprintf(char *buf, size_t size, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) +{ + WT_DECL_RET; + size_t len; + va_list ap; + + len = 0; + + va_start(ap, fmt); + ret = __wt_vsnprintf_len_incr(buf, size, &len, fmt, ap); + va_end(ap); + WT_RET(ret); + + /* It's an error if the buffer couldn't hold everything. */ + return (len >= size ? ERANGE : 0); +} + +/* + * __wt_vsnprintf -- + * vsnprintf convenience function, ignoring the returned size. + */ +static inline int +__wt_vsnprintf(char *buf, size_t size, const char *fmt, va_list ap) +{ + size_t len; + + len = 0; + + WT_RET(__wt_vsnprintf_len_incr(buf, size, &len, fmt, ap)); + + /* It's an error if the buffer couldn't hold everything. */ + return (len >= size ? ERANGE : 0); +} + +/* + * __wt_snprintf_len_set -- + * snprintf convenience function, setting the returned size. + */ +static inline int +__wt_snprintf_len_set( + char *buf, size_t size, size_t *retsizep, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format (printf, 4, 5))) +{ + WT_DECL_RET; + va_list ap; + + *retsizep = 0; + + va_start(ap, fmt); + ret = __wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap); + va_end(ap); + return (ret); +} + +/* + * __wt_vsnprintf_len_set -- + * vsnprintf convenience function, setting the returned size. + */ +static inline int +__wt_vsnprintf_len_set( + char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) +{ + *retsizep = 0; + + return (__wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap)); +} + +/* + * __wt_snprintf_len_incr -- + * snprintf convenience function, incrementing the returned size. + */ +static inline int +__wt_snprintf_len_incr( + char *buf, size_t size, size_t *retsizep, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format (printf, 4, 5))) +{ + WT_DECL_RET; + va_list ap; + + va_start(ap, fmt); + ret = __wt_vsnprintf_len_incr(buf, size, retsizep, fmt, ap); + va_end(ap); + return (ret); +} diff --git a/src/include/os_windows.h b/src/include/os_windows.h index 65938ac9f17..c1e5f788dc6 100644 --- a/src/include/os_windows.h +++ b/src/include/os_windows.h @@ -43,16 +43,6 @@ typedef uint32_t u_int; typedef unsigned char u_char; typedef uint64_t u_long; -/* <= VS 2013 is not C99 compat */ -#if _MSC_VER < 1900 -#define snprintf _wt_snprintf - -_Check_return_opt_ int __cdecl _wt_snprintf( - _Out_writes_(_MaxCount) char * _DstBuf, - _In_ size_t _MaxCount, - _In_z_ _Printf_format_string_ const char * _Format, ...); -#endif - /* * Windows does have ssize_t * Python headers declare also though so we need to guard it @@ -61,18 +51,6 @@ _Check_return_opt_ int __cdecl _wt_snprintf( typedef int ssize_t; #endif -/* - * Provide a custom version of vsnprintf that returns the - * needed buffer length instead of -1 on truncation - */ -#define vsnprintf _wt_vsnprintf - -_Check_return_opt_ int __cdecl _wt_vsnprintf( - _Out_writes_(_MaxCount) char * _DstBuf, - _In_ size_t _MaxCount, - _In_z_ _Printf_format_string_ const char * _Format, - va_list _ArgList); - /* Provide a custom version of localtime_r */ struct tm *localtime_r(const time_t* timer, struct tm* result); diff --git a/src/include/packing.i b/src/include/packing.i index 6b4bcd49e04..0eadb2f2027 100644 --- a/src/include/packing.i +++ b/src/include/packing.i @@ -104,8 +104,8 @@ __pack_name_next(WT_PACK_NAME *pn, WT_CONFIG_ITEM *name) WT_CONFIG_ITEM ignore; if (pn->genname) { - (void)snprintf(pn->buf, sizeof(pn->buf), - (pn->iskey ? "key%d" : "value%d"), pn->count); + WT_RET(__wt_snprintf(pn->buf, sizeof(pn->buf), + (pn->iskey ? "key%d" : "value%d"), pn->count)); WT_CLEAR(*name); name->str = pn->buf; name->len = strlen(pn->buf); diff --git a/src/log/log.c b/src/log/log.c index 1a27120710b..5b24250fffc 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -2246,8 +2246,10 @@ __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap) return (0); va_copy(ap_copy, ap); - len = (size_t)vsnprintf(NULL, 0, fmt, ap_copy) + 1; + len = 1; + ret = __wt_vsnprintf_len_incr(NULL, 0, &len, fmt, ap_copy); va_end(ap_copy); + WT_RET(ret); WT_RET( __wt_logrec_alloc(session, sizeof(WT_LOG_RECORD) + len, &logrec)); @@ -2264,7 +2266,8 @@ __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap) rec_fmt, rectype)); logrec->size += (uint32_t)header_size; - (void)vsnprintf((char *)logrec->data + logrec->size, len, fmt, ap); + WT_ERR(__wt_vsnprintf( + (char *)logrec->data + logrec->size, len, fmt, ap)); __wt_verbose(session, WT_VERB_LOG, "log_printf: %s", (char *)logrec->data + logrec->size); diff --git a/src/lsm/lsm_stat.c b/src/lsm/lsm_stat.c index ed760b6d5f3..411655878af 100644 --- a/src/lsm/lsm_stat.c +++ b/src/lsm/lsm_stat.c @@ -38,13 +38,13 @@ __curstat_lsm_init( /* Propagate all, fast and/or clear to the cursors we open. */ if (cst->flags != 0) { - (void)snprintf(config, sizeof(config), + WT_ERR(__wt_snprintf(config, sizeof(config), "statistics=(%s%s%s%s)", F_ISSET(cst, WT_STAT_TYPE_ALL) ? "all," : "", F_ISSET(cst, WT_STAT_CLEAR) ? "clear," : "", !F_ISSET(cst, WT_STAT_TYPE_ALL) && F_ISSET(cst, WT_STAT_TYPE_FAST) ? "fast," : "", - F_ISSET(cst, WT_STAT_TYPE_SIZE) ? "size," : ""); + F_ISSET(cst, WT_STAT_TYPE_SIZE) ? "size," : "")); cfg[1] = disk_cfg[1] = config; } diff --git a/src/os_common/filename.c b/src/os_common/filename.c index 5aeb64bb51e..f803144a3fb 100644 --- a/src/os_common/filename.c +++ b/src/os_common/filename.c @@ -43,8 +43,8 @@ __wt_nfilename( else { len = strlen(S2C(session)->home) + 1 + namelen + 1; WT_RET(__wt_calloc(session, 1, len, &buf)); - snprintf(buf, len, "%s%s%.*s", S2C(session)->home, - __wt_path_separator(), (int)namelen, name); + WT_RET(__wt_snprintf(buf, len, "%s%s%.*s", S2C(session)->home, + __wt_path_separator(), (int)namelen, name)); *path = buf; } diff --git a/src/os_common/os_errno.c b/src/os_common/os_errno.c index a8e56b7f1aa..7ac89536e79 100644 --- a/src/os_common/os_errno.c +++ b/src/os_common/os_errno.c @@ -44,7 +44,7 @@ __wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen) * Fallback to a generic message. */ if (session == NULL && - snprintf(errbuf, errlen, "error return: %d", error) > 0) + __wt_snprintf(errbuf, errlen, "error return: %d", error) == 0) return (errbuf); if (session != NULL && __wt_buf_fmt( session, &session->err, "error return: %d", error) == 0) diff --git a/src/os_common/os_fstream.c b/src/os_common/os_fstream.c index 5a368ea75e6..744da732d84 100644 --- a/src/os_common/os_fstream.c +++ b/src/os_common/os_fstream.c @@ -144,7 +144,7 @@ __fstream_printf( p = (char *)((uint8_t *)buf->mem + buf->size); WT_ASSERT(session, buf->memsize >= buf->size); space = buf->memsize - buf->size; - len = (size_t)vsnprintf(p, space, fmt, ap_copy); + WT_RET(__wt_vsnprintf_len_set(p, space, &len, fmt, ap_copy)); va_end(ap_copy); if (len < space) { diff --git a/src/os_posix/os_snprintf.c b/src/os_posix/os_snprintf.c new file mode 100644 index 00000000000..390e2e0334a --- /dev/null +++ b/src/os_posix/os_snprintf.c @@ -0,0 +1,27 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_vsnprintf_len_incr -- + * POSIX vsnprintf convenience function, incrementing the returned size. + */ +int +__wt_vsnprintf_len_incr( + char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) + WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) +{ + WT_DECL_RET; + + if ((ret = vsnprintf(buf, size, fmt, ap)) >= 0) { + *retsizep += (size_t)ret; + return (0); + } + return (__wt_errno()); +} diff --git a/src/os_posix/os_thread.c b/src/os_posix/os_thread.c index 9bf36cc2686..85d43f10a33 100644 --- a/src/os_posix/os_thread.c +++ b/src/os_posix/os_thread.c @@ -45,7 +45,7 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) * __wt_thread_id -- * Fill in a printable version of the process and thread IDs. */ -void +int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) { @@ -57,10 +57,10 @@ __wt_thread_id(char *buf, size_t buflen) */ self = pthread_self(); #ifdef __sun - (void)snprintf(buf, buflen, - "%" PRIuMAX ":%u", (uintmax_t)getpid(), self); + return (__wt_snprintf(buf, buflen, + "%" PRIuMAX ":%u", (uintmax_t)getpid(), self)); #else - (void)snprintf(buf, buflen, - "%" PRIuMAX ":%p", (uintmax_t)getpid(), (void *)self); + return (__wt_snprintf(buf, buflen, + "%" PRIuMAX ":%p", (uintmax_t)getpid(), (void *)self)); #endif } diff --git a/src/os_win/os_snprintf.c b/src/os_win/os_snprintf.c index a6056ff9342..f3025b12a60 100644 --- a/src/os_win/os_snprintf.c +++ b/src/os_win/os_snprintf.c @@ -8,17 +8,47 @@ #include "wt_internal.h" -_Check_return_opt_ int __cdecl _wt_snprintf( - _Out_writes_(_MaxCount) char * _DstBuf, - _In_ size_t _MaxCount, - _In_z_ _Printf_format_string_ const char * _Format, ...) +/* + * __wt_vsnprintf_len_incr -- + * POSIX vsnprintf convenience function, incrementing the returned size. + */ +int +__wt_vsnprintf_len_incr( + char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) { - va_list args; - WT_DECL_RET; + int len; + + /* + * WiredTiger calls with length 0 to get the needed buffer size. Call + * the count only version in this case, _vsnprintf_s will invoke the + * invalid parameter handler if count is less than or equal to zero. + */ + if (size == 0) { + *retsizep += (size_t)_vscprintf(fmt, ap); + return (0); + } + + /* + * Additionally, the invalid parameter handler is invoked if buffer or + * format is a NULL pointer. + */ + if (buf == NULL || fmt == NULL) + return (EINVAL); + + /* + * If the storage required to store the data and a terminating null + * exceeds size, the invalid parameter handler is invoked, unless + * count is _TRUNCATE, in which case as much of the string as will + * fit in the buffer is written and -1 returned. + */ + if ((len = _vsnprintf_s(buf, size, _TRUNCATE, fmt, ap)) >= 0) { + *retsizep += (size_t)len; + return (0); + } - va_start(args, _Format); - ret = _wt_vsnprintf(_DstBuf, _MaxCount, _Format, args); - va_end(args); + /* Return the buffer size required. */ + if (len == -1) + *retsizep += (size_t)_vscprintf(fmt, ap); - return (ret); + return (0); } diff --git a/src/os_win/os_thread.c b/src/os_win/os_thread.c index a34dff776b6..7442fb08a36 100644 --- a/src/os_win/os_thread.c +++ b/src/os_win/os_thread.c @@ -58,10 +58,10 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) * __wt_thread_id -- * Fill in a printable version of the process and thread IDs. */ -void +int __wt_thread_id(char *buf, size_t buflen) { - (void)snprintf(buf, buflen, + return (__wt_snprintf(buf, buflen, "%" PRIu64 ":%" PRIu64, - (uint64_t)GetCurrentProcessId(), (uint64_t)GetCurrentThreadId); + (uint64_t)GetCurrentProcessId(), (uint64_t)GetCurrentThreadId)); } diff --git a/src/os_win/os_vsnprintf.c b/src/os_win/os_vsnprintf.c deleted file mode 100644 index 63f96e79d5b..00000000000 --- a/src/os_win/os_vsnprintf.c +++ /dev/null @@ -1,41 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -_Check_return_opt_ int __cdecl _wt_vsnprintf( - _Out_writes_(_MaxCount) char * _DstBuf, - _In_ size_t _MaxCount, - _In_z_ _Printf_format_string_ const char * _Format, - va_list _ArgList) -{ - int len; - - /* - * WiredTiger will call with length 0 to get the needed buffer size - * We call the count only version in this case since vsnprintf_s assumes - * length is greater than zero or else it triggers the invalid_parameter - * handler. - */ - if (_MaxCount == 0) { - return _vscprintf(_Format, _ArgList); - } - - len = (size_t)_vsnprintf_s( - _DstBuf, _MaxCount, _TRUNCATE, _Format, _ArgList); - - /* - * The MSVC implementation returns -1 on truncation instead of what - * it would have written. We could let callers iteratively grow the - * buffer, or just ask us how big a buffer they would like. - */ - if (len == -1) - len = _vscprintf(_Format, _ArgList) + 1; - - return (len); -} diff --git a/src/schema/schema_create.c b/src/schema/schema_create.c index a77ca51f9d2..0677fa711a5 100644 --- a/src/schema/schema_create.c +++ b/src/schema/schema_create.c @@ -601,7 +601,8 @@ __create_table(WT_SESSION_IMPL *session, if (ncolgroups == 0) { cgsize = strlen("colgroup:") + strlen(tablename) + 1; WT_ERR(__wt_calloc_def(session, cgsize, &cgname)); - snprintf(cgname, cgsize, "colgroup:%s", tablename); + WT_ERR(__wt_snprintf( + cgname, cgsize, "colgroup:%s", tablename)); WT_ERR(__create_colgroup( session, cgname, exclusive, config)); } diff --git a/src/support/err.c b/src/support/err.c index 369997d38c0..57efde72b23 100644 --- a/src/support/err.c +++ b/src/support/err.c @@ -102,9 +102,10 @@ __handler_failure(WT_SESSION_IMPL *session, */ char s[256]; - (void)snprintf(s, sizeof(s), + if (__wt_snprintf(s, sizeof(s), "application %s event handler failed: %s", - which, __wt_strerror(session, error, NULL, 0)); + which, __wt_strerror(session, error, NULL, 0)) != 0) + return; /* * Use the error handler to report the failure, unless it was the error @@ -148,6 +149,23 @@ __wt_event_handler_set(WT_SESSION_IMPL *session, WT_EVENT_HANDLER *handler) session->event_handler = handler; } +#define WT_ERROR_APPEND(p, remain, ...) do { \ + size_t __len; \ + WT_ERR(__wt_snprintf_len_set(p, remain, &__len, __VA_ARGS__)); \ + if (__len > remain) \ + __len = remain; \ + p += __len; \ + remain -= __len; \ +} while (0) +#define WT_ERROR_APPEND_AP(p, remain, ...) do { \ + size_t __len; \ + WT_ERR(__wt_vsnprintf_len_set(p, remain, &__len, __VA_ARGS__)); \ + if (__len > remain) \ + __len = remain; \ + p += __len; \ + remain -= __len; \ +} while (0) + /* * __wt_eventv -- * Report a message to an event handler. @@ -161,9 +179,9 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, WT_DECL_RET; WT_SESSION *wt_session; struct timespec ts; - size_t len, remain, wlen; + size_t len, remain; const char *err, *prefix; - char *end, *p, tid[128]; + char *p, tid[128]; /* * We're using a stack buffer because we want error messages no matter @@ -174,6 +192,8 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * Buffer placed at the end of the stack in case snprintf overflows. */ char s[2048]; + p = s; + remain = sizeof(s); /* * !!! @@ -185,24 +205,8 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * first session, but if the allocation of the first session fails, for * example, we can end up here without a session.) */ - if (session == NULL) { - if (fprintf(stderr, - "WiredTiger Error%s%s: ", - error == 0 ? "" : ": ", - error == 0 ? "" : - __wt_strerror(session, error, NULL, 0)) < 0) - ret = EIO; - if (vfprintf(stderr, fmt, ap) < 0) - ret = EIO; - if (fprintf(stderr, "\n") < 0) - ret = EIO; - if (fflush(stderr) != 0) - ret = EIO; - return (ret); - } - - p = s; - end = s + sizeof(s); + if (session == NULL) + goto err; /* * We have several prefixes for the error message: a timestamp and the @@ -211,42 +215,24 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * followed by a colon. */ __wt_epoch(session, &ts); - __wt_thread_id(tid, sizeof(tid)); - remain = WT_PTRDIFF(end, p); - wlen = (size_t)snprintf(p, remain, "[%" PRIuMAX ":%" PRIuMAX "][%s]", + WT_ERR(__wt_thread_id(tid, sizeof(tid))); + WT_ERROR_APPEND(p, remain, + "[%" PRIuMAX ":%" PRIuMAX "][%s]", (uintmax_t)ts.tv_sec, (uintmax_t)ts.tv_nsec / WT_THOUSAND, tid); - p = wlen >= remain ? end : p + wlen; - if ((prefix = S2C(session)->error_prefix) != NULL) { - remain = WT_PTRDIFF(end, p); - wlen = (size_t)snprintf(p, remain, ", %s", prefix); - p = wlen >= remain ? end : p + wlen; - } + if ((prefix = S2C(session)->error_prefix) != NULL) + WT_ERROR_APPEND(p, remain, ", %s", prefix); prefix = session->dhandle == NULL ? NULL : session->dhandle->name; - if (prefix != NULL) { - remain = WT_PTRDIFF(end, p); - wlen = (size_t)snprintf(p, remain, ", %s", prefix); - p = wlen >= remain ? end : p + wlen; - } - if ((prefix = session->name) != NULL) { - remain = WT_PTRDIFF(end, p); - wlen = (size_t)snprintf(p, remain, ", %s", prefix); - p = wlen >= remain ? end : p + wlen; - } - remain = WT_PTRDIFF(end, p); - wlen = (size_t)snprintf(p, remain, ": "); - p = wlen >= remain ? end : p + wlen; - - if (file_name != NULL) { - remain = WT_PTRDIFF(end, p); - wlen = (size_t) - snprintf(p, remain, "%s, %d: ", file_name, line_number); - p = wlen >= remain ? end : p + wlen; - } + if (prefix != NULL) + WT_ERROR_APPEND(p, remain, ", %s", prefix); + if ((prefix = session->name) != NULL) + WT_ERROR_APPEND(p, remain, ", %s", prefix); + WT_ERROR_APPEND(p, remain, ": "); + + if (file_name != NULL) + WT_ERROR_APPEND(p, remain, "%s, %d: ", file_name, line_number); - remain = WT_PTRDIFF(end, p); - wlen = (size_t)vsnprintf(p, remain, fmt, ap); - p = wlen >= remain ? end : p + wlen; + WT_ERROR_APPEND_AP(p, remain, fmt, ap); if (error != 0) { /* @@ -261,10 +247,8 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, */ err = __wt_strerror(session, error, NULL, 0); len = strlen(err); - if (WT_PTRDIFF(p, s) < len || strcmp(p - len, err) != 0) { - remain = WT_PTRDIFF(end, p); - (void)snprintf(p, remain, ": %s", err); - } + if (WT_PTRDIFF(p, s) < len || strcmp(p - len, err) != 0) + WT_ERROR_APPEND(p, remain, ": %s", err); } /* @@ -279,7 +263,7 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * * If an application-specified error message handler fails, complain * using the default error handler. If the default error handler fails, - * there's nothing to do. + * fallback to stderr. */ wt_session = (WT_SESSION *)session; handler = session->event_handler; @@ -293,6 +277,21 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, __handler_failure(session, ret, "error", true); } + if (ret != 0) { +err: if (fprintf(stderr, + "WiredTiger Error%s%s: ", + error == 0 ? "" : ": ", + error == 0 ? "" : + __wt_strerror(session, error, NULL, 0)) < 0) + WT_TRET(EIO); + if (vfprintf(stderr, fmt, ap) < 0) + WT_TRET(EIO); + if (fprintf(stderr, "\n") < 0) + WT_TRET(EIO); + if (fflush(stderr) != 0) + WT_TRET(EIO); + } + return (ret); } @@ -376,7 +375,7 @@ info_msg(WT_SESSION_IMPL *session, const char *fmt, va_list ap) */ char s[2048]; - (void)vsnprintf(s, sizeof(s), fmt, ap); + WT_RET(__wt_vsnprintf(s, sizeof(s), fmt, ap)); wt_session = (WT_SESSION *)session; handler = session->event_handler; diff --git a/src/support/scratch.c b/src/support/scratch.c index 69987ebc852..485cea90e89 100644 --- a/src/support/scratch.c +++ b/src/support/scratch.c @@ -69,13 +69,16 @@ int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) { + WT_DECL_RET; va_list ap; size_t len; for (;;) { va_start(ap, fmt); - len = (size_t)vsnprintf(buf->mem, buf->memsize, fmt, ap); + ret = __wt_vsnprintf_len_set( + buf->mem, buf->memsize, &len, fmt, ap); va_end(ap); + WT_RET(ret); /* Check if there was enough space. */ if (len < buf->memsize) { @@ -100,6 +103,7 @@ int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) { + WT_DECL_RET; va_list ap; size_t len, space; char *p; @@ -117,8 +121,9 @@ __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) p = (char *)((uint8_t *)buf->mem + buf->size); WT_ASSERT(session, buf->memsize >= buf->size); space = buf->memsize - buf->size; - len = (size_t)vsnprintf(p, space, fmt, ap); + ret = __wt_vsnprintf_len_set(p, space, &len, fmt, ap); va_end(ap); + WT_RET(ret); /* Check if there was enough space. */ if (len < space) { diff --git a/src/utilities/util_backup.c b/src/utilities/util_backup.c index 5dc9671fb45..f1b31f7621a 100644 --- a/src/utilities/util_backup.c +++ b/src/utilities/util_backup.c @@ -109,9 +109,14 @@ copy(WT_SESSION *session, const char *directory, const char *name) /* Build the target pathname. */ len = strlen(directory) + strlen(name) + 2; - if ((to = malloc(len)) == NULL) - goto memerr; - (void)snprintf(to, len, "%s/%s", directory, name); + if ((to = malloc(len)) == NULL) { + fprintf(stderr, "%s: %s\n", progname, strerror(errno)); + return (1); + } + if ((ret = __wt_snprintf(to, len, "%s/%s", directory, name)) != 0) { + fprintf(stderr, "%s: %s\n", progname, strerror(ret)); + goto err; + } if (verbose && printf("Backing up %s/%s to %s\n", home, name, to) < 0) { fprintf(stderr, "%s: %s\n", progname, strerror(EIO)); @@ -126,11 +131,7 @@ copy(WT_SESSION *session, const char *directory, const char *name) fprintf(stderr, "%s/%s to %s: backup copy: %s\n", home, name, to, session->strerror(session, ret)); - if (0) { -memerr: fprintf(stderr, "%s: %s\n", progname, strerror(errno)); - } err: free(to); - return (ret); } diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c index 947fa7bf9ef..238e2757099 100644 --- a/src/utilities/util_dump.c +++ b/src/utilities/util_dump.c @@ -259,14 +259,15 @@ dump_add_config(WT_SESSION *session, char **bufp, size_t *leftp, const char *fmt, ...) WT_GCC_FUNC_ATTRIBUTE((format (printf, 4, 5))) { - int n; + WT_DECL_RET; + size_t n; va_list ap; va_start(ap, fmt); - n = vsnprintf(*bufp, *leftp, fmt, ap); + ret = __wt_vsnprintf_len_set(*bufp, *leftp, &n, fmt, ap); va_end(ap); - if (n < 0) - return (util_err(session, EINVAL, NULL)); + if (ret != 0) + return (util_err(session, ret, NULL)); *bufp += n; *leftp -= (size_t)n; return (0); @@ -435,9 +436,9 @@ dump_table_parts_config(WT_SESSION *session, WT_CURSOR *cursor, len = strlen(entry) + strlen(name) + 1; if ((uriprefix = malloc(len)) == NULL) - return util_err(session, errno, NULL); - - snprintf(uriprefix, len, "%s%s", entry, name); + return (util_err(session, errno, NULL)); + if ((ret = __wt_snprintf(uriprefix, len, "%s%s", entry, name)) != 0) + return (util_err(session, ret, NULL)); /* * Search the file looking for column group and index key/value pairs: diff --git a/src/utilities/util_load.c b/src/utilities/util_load.c index d31fa4c9d08..d2f00402217 100644 --- a/src/utilities/util_load.c +++ b/src/utilities/util_load.c @@ -120,10 +120,12 @@ load_dump(WT_SESSION *session) goto err; /* Open the insert cursor. */ - (void)snprintf(config, sizeof(config), + if ((ret = __wt_snprintf(config, sizeof(config), "dump=%s%s%s", hex ? "hex" : "print", - append ? ",append" : "", no_overwrite ? ",overwrite=false" : ""); + append ? ",append" : "", + no_overwrite ? ",overwrite=false" : "")) != 0) + return (util_err(session, ret, NULL)); if ((ret = session->open_cursor( session, uri, NULL, config, &cursor)) != 0) { ret = util_err(session, ret, "%s: session.open_cursor", uri); @@ -472,6 +474,7 @@ config_update(WT_SESSION *session, char **list) static int config_rename(WT_SESSION *session, char **urip, const char *name) { + WT_DECL_RET; size_t len; char *buf, *p; @@ -490,7 +493,9 @@ config_rename(WT_SESSION *session, char **urip, const char *name) } *p = '\0'; p = strchr(p + 1, ':'); - snprintf(buf, len, "%s:%s%s", *urip, name, p == NULL ? "" : p); + if ((ret = __wt_snprintf( + buf, len, "%s:%s%s", *urip, name, p == NULL ? "" : p)) != 0) + return (util_err(session, ret, NULL)); *urip = buf; return (0); diff --git a/src/utilities/util_load_json.c b/src/utilities/util_load_json.c index 1189d49a483..af5c2576b26 100644 --- a/src/utilities/util_load_json.c +++ b/src/utilities/util_load_json.c @@ -145,6 +145,7 @@ static int json_kvraw_append(WT_SESSION *session, JSON_INPUT_STATE *ins, const char *str, size_t len) { + WT_DECL_RET; size_t needsize; char *tmp; @@ -152,7 +153,9 @@ json_kvraw_append(WT_SESSION *session, needsize = strlen(ins->kvraw) + len + 2; if ((tmp = malloc(needsize)) == NULL) return (util_err(session, errno, NULL)); - snprintf(tmp, needsize, "%s %.*s", ins->kvraw, (int)len, str); + if ((ret = __wt_snprintf( + tmp, needsize, "%s %.*s", ins->kvraw, (int)len, str)) != 0) + return (util_err(session, ret, NULL)); free(ins->kvraw); ins->kvraw = tmp; } @@ -181,7 +184,7 @@ json_strdup(WT_SESSION *session, JSON_INPUT_STATE *ins, char **resultp) goto err; } resultlen += 1; - if ((result = (char *)malloc((size_t)resultlen)) == NULL) { + if ((result = malloc((size_t)resultlen)) == NULL) { ret = util_err(session, errno, NULL); goto err; } @@ -236,10 +239,13 @@ json_data(WT_SESSION *session, goto err; uri = clp->list[0]; - (void)snprintf(config, sizeof(config), + if ((ret = __wt_snprintf(config, sizeof(config), "dump=json%s%s", LF_ISSET(LOAD_JSON_APPEND) ? ",append" : "", - LF_ISSET(LOAD_JSON_NO_OVERWRITE) ? ",overwrite=false" : ""); + LF_ISSET(LOAD_JSON_NO_OVERWRITE) ? ",overwrite=false" : "")) != 0) { + ret = util_err(session, ret, NULL); + goto err; + } if ((ret = session->open_cursor( session, uri, NULL, config, &cursor)) != 0) { ret = util_err(session, ret, "%s: session.open_cursor", uri); @@ -256,7 +262,7 @@ json_data(WT_SESSION *session, nfield = 0; JSON_EXPECT(session, ins, '{'); if (ins->kvraw == NULL) { - if ((ins->kvraw = (char *)malloc(1)) == NULL) { + if ((ins->kvraw = malloc(1)) == NULL) { ret = util_err(session, errno, NULL); goto err; } @@ -358,8 +364,11 @@ json_top_level(WT_SESSION *session, JSON_INPUT_STATE *ins, uint32_t flags) while (json_peek(session, ins) == 's') { JSON_EXPECT(session, ins, 's'); tableuri = realloc(tableuri, ins->toklen); - snprintf(tableuri, ins->toklen, "%.*s", - (int)(ins->toklen - 2), ins->tokstart + 1); + if ((ret = __wt_snprintf(tableuri, ins->toklen, + "%.*s", (int)(ins->toklen - 2), ins->tokstart + 1)) != 0) { + ret = util_err(session, ret, NULL); + goto err; + } JSON_EXPECT(session, ins, ':'); if (!hasversion) { if (strcmp(tableuri, DUMP_JSON_VERSION_MARKER) != 0) { diff --git a/src/utilities/util_main.c b/src/utilities/util_main.c index 68e3b0f1bc5..2b4ef36081a 100644 --- a/src/utilities/util_main.c +++ b/src/utilities/util_main.c @@ -257,9 +257,13 @@ main(int argc, char *argv[]) (void)util_err(NULL, errno, NULL); goto err; } - (void)snprintf(p, len, "%s,%s,%s%s%s%s", + if ((ret = __wt_snprintf(p, len, "%s,%s,%s%s%s%s", config == NULL ? "" : config, - cmd_config == NULL ? "" : cmd_config, rec_config, p1, p2, p3); + cmd_config == NULL ? "" : cmd_config, + rec_config, p1, p2, p3)) != 0) { + (void)util_err(NULL, ret, NULL); + goto err; + } config = p; /* Open the database and a session. */ @@ -298,6 +302,7 @@ done: char * util_uri(WT_SESSION *session, const char *s, const char *type) { + WT_DECL_RET; size_t len; char *name; @@ -321,8 +326,12 @@ util_uri(WT_SESSION *session, const char *s, const char *type) * the default type for the operation. */ if (strchr(s, ':') != NULL) - snprintf(name, len, "%s", s); + ret = __wt_snprintf(name, len, "%s", s); else - snprintf(name, len, "%s:%s", type, s); + ret = __wt_snprintf(name, len, "%s:%s", type, s); + if (ret != 0) { + (void)util_err(session, ret, NULL); + return (NULL); + } return (name); } diff --git a/src/utilities/util_misc.c b/src/utilities/util_misc.c index 0905bfa97be..e26185a0096 100644 --- a/src/utilities/util_misc.c +++ b/src/utilities/util_misc.c @@ -140,7 +140,10 @@ util_flush(WT_SESSION *session, const char *uri) if ((buf = malloc(len)) == NULL) return (util_err(session, errno, NULL)); - (void)snprintf(buf, len, "target=(\"%s\")", uri); + if ((ret = __wt_snprintf(buf, len, "target=(\"%s\")", uri)) != 0) { + free(buf); + return (util_err(session, ret, NULL)); + } ret = session->checkpoint(session, buf); free(buf); diff --git a/src/utilities/util_stat.c b/src/utilities/util_stat.c index 1b75d9ea8bf..0692afe2819 100644 --- a/src/utilities/util_stat.c +++ b/src/utilities/util_stat.c @@ -68,7 +68,10 @@ util_stat(WT_SESSION *session, int argc, char *argv[]) fprintf(stderr, "%s: %s\n", progname, strerror(errno)); goto err; } - snprintf(uri, urilen, "statistics:%s", objname); + if ((ret = __wt_snprintf(uri, urilen, "statistics:%s", objname)) != 0) { + fprintf(stderr, "%s: %s\n", progname, strerror(ret)); + goto err; + } if ((ret = session->open_cursor(session, uri, NULL, config, &cursor)) != 0) { diff --git a/src/utilities/util_verify.c b/src/utilities/util_verify.c index d0587fcfc8c..ace1be7a5de 100644 --- a/src/utilities/util_verify.c +++ b/src/utilities/util_verify.c @@ -72,7 +72,7 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) ret = util_err(session, errno, NULL); goto err; } - snprintf(config, size, + if ((ret = __wt_snprintf(config, size, "%s%s%s%s%s%s%s", dump_address ? "dump_address," : "", dump_blocks ? "dump_blocks," : "", @@ -80,7 +80,10 @@ util_verify(WT_SESSION *session, int argc, char *argv[]) dump_offsets != NULL ? "dump_offsets=[" : "", dump_offsets != NULL ? dump_offsets : "", dump_offsets != NULL ? "]," : "", - dump_pages ? "dump_pages," : ""); + dump_pages ? "dump_pages," : "")) != 0) { + (void)util_err(session, ret, NULL); + goto err; + } } if ((ret = session->verify(session, uri, config)) != 0) (void)util_err(session, ret, "session.verify: %s", uri); diff --git a/src/utilities/util_write.c b/src/utilities/util_write.c index b931fad064d..1d3e6937f8d 100644 --- a/src/utilities/util_write.c +++ b/src/utilities/util_write.c @@ -54,8 +54,12 @@ util_write(WT_SESSION *session, int argc, char *argv[]) * Open the object; free allocated memory immediately to simplify * future error handling. */ - (void)snprintf(config, sizeof(config), "%s,%s", - append ? "append=true" : "", overwrite ? "overwrite=true" : ""); + if ((ret = __wt_snprintf(config, sizeof(config), "%s,%s", + append ? "append=true" : "", + overwrite ? "overwrite=true" : "")) != 0) { + free(uri); + return (util_err(session, ret, NULL)); + } if ((ret = session->open_cursor(session, uri, NULL, config, &cursor)) != 0) (void)util_err(session, ret, "%s: session.open_cursor", uri); diff --git a/test/bloom/test_bloom.c b/test/bloom/test_bloom.c index bef509e01d8..b6299bbbadc 100644 --- a/test/bloom/test_bloom.c +++ b/test/bloom/test_bloom.c @@ -121,9 +121,9 @@ setup(void) * Open configuration -- put command line configuration options at the * end so they can override "standard" configuration. */ - snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "create,error_prefix=\"%s\",cache_size=%" PRIu32 "MB,%s", - progname, g.c_cache, g.config_open == NULL ? "" : g.config_open); + progname, g.c_cache, g.config_open == NULL ? "" : g.config_open)); testutil_check(wiredtiger_open(NULL, NULL, config, &conn)); diff --git a/test/checkpoint/checkpointer.c b/test/checkpoint/checkpointer.c index ef49a9492ce..84d2765843a 100644 --- a/test/checkpoint/checkpointer.c +++ b/test/checkpoint/checkpointer.c @@ -74,7 +74,7 @@ checkpointer(void *arg) WT_UNUSED(arg); - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); printf("checkpointer thread starting: tid: %s\n", tid); (void)real_checkpointer(); @@ -107,8 +107,9 @@ real_checkpointer(void) "WiredTigerCheckpoint", strlen("WiredTigerCheckpoint")) == 0) checkpoint_config = NULL; else { + testutil_check(__wt_snprintf( + _buf, sizeof(_buf), "name=%s", g.checkpoint_name)); checkpoint_config = _buf; - snprintf(checkpoint_config, 128, "name=%s", g.checkpoint_name); } while (g.running) { /* Execute a checkpoint */ @@ -147,7 +148,8 @@ verify_checkpoint(WT_SESSION *session) ret = t_ret = 0; key_count = 0; - snprintf(ckpt, 128, "checkpoint=%s", g.checkpoint_name); + testutil_check(__wt_snprintf( + ckpt, sizeof(ckpt), "checkpoint=%s", g.checkpoint_name)); cursors = calloc((size_t)g.ntables, sizeof(*cursors)); if (cursors == NULL) return (log_print_err("verify_checkpoint", ENOMEM, 1)); @@ -159,7 +161,8 @@ verify_checkpoint(WT_SESSION *session) */ if (g.cookies[i].type == LSM) continue; - snprintf(next_uri, 128, "table:__wt%04d", i); + testutil_check(__wt_snprintf( + next_uri, sizeof(next_uri), "table:__wt%04d", i)); if ((ret = session->open_cursor( session, next_uri, NULL, ckpt, &cursors[i])) != 0) { (void)log_print_err( @@ -296,7 +299,8 @@ diagnose_key_error( session = cursor1->session; key1_orig = key2_orig = 0; - snprintf(ckpt, 128, "checkpoint=%s", g.checkpoint_name); + testutil_check(__wt_snprintf( + ckpt, sizeof(ckpt), "checkpoint=%s", g.checkpoint_name)); /* Save the failed keys. */ if (cursor1->get_key(cursor1, &key1_orig) != 0 || @@ -338,7 +342,8 @@ diagnose_key_error( * Now try opening new cursors on the checkpoints and see if we * get the same missing key via searching. */ - snprintf(next_uri, 128, "table:__wt%04d", index1); + testutil_check(__wt_snprintf( + next_uri, sizeof(next_uri), "table:__wt%04d", index1)); if (session->open_cursor(session, next_uri, NULL, ckpt, &c) != 0) return (1); c->set_key(c, key1_orig); @@ -350,7 +355,8 @@ diagnose_key_error( if (c->close(c) != 0) return (1); - snprintf(next_uri, 128, "table:__wt%04d", index2); + testutil_check(__wt_snprintf( + next_uri, sizeof(next_uri), "table:__wt%04d", index2)); if (session->open_cursor(session, next_uri, NULL, ckpt, &c) != 0) return (1); c->set_key(c, key1_orig); @@ -367,7 +373,8 @@ live_check: * Now try opening cursors on the live checkpoint to see if we get the * same missing key via searching. */ - snprintf(next_uri, 128, "table:__wt%04d", index1); + testutil_check(__wt_snprintf( + next_uri, sizeof(next_uri), "table:__wt%04d", index1)); if (session->open_cursor(session, next_uri, NULL, NULL, &c) != 0) return (1); c->set_key(c, key1_orig); @@ -376,7 +383,8 @@ live_check: if (c->close(c) != 0) return (1); - snprintf(next_uri, 128, "table:__wt%04d", index2); + testutil_check(__wt_snprintf( + next_uri, sizeof(next_uri), "table:__wt%04d", index2)); if (session->open_cursor(session, next_uri, NULL, NULL, &c) != 0) return (1); c->set_key(c, key2_orig); diff --git a/test/checkpoint/test_checkpoint.c b/test/checkpoint/test_checkpoint.c index c7132b433d2..e7e1a0b81a5 100644 --- a/test/checkpoint/test_checkpoint.c +++ b/test/checkpoint/test_checkpoint.c @@ -199,11 +199,11 @@ wt_connect(const char *config_open) testutil_make_work_dir(g.home); - snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "create,statistics=(fast),error_prefix=\"%s\",cache_size=1GB%s%s", progname, config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open); + config_open == NULL ? "" : config_open)); if ((ret = wiredtiger_open( g.home, &event_handler, config, &g.conn)) != 0) diff --git a/test/checkpoint/workers.c b/test/checkpoint/workers.c index e4fe7bd1b29..82d1b8685c4 100644 --- a/test/checkpoint/workers.c +++ b/test/checkpoint/workers.c @@ -39,14 +39,12 @@ static int create_table(WT_SESSION *session, COOKIE *cookie) { int ret; - char *p, *end, config[128]; + char config[128]; - p = config; - end = config + sizeof(config); - p += snprintf(p, (size_t)(end - p), - "key_format=%s,value_format=S", cookie->type == COL ? "r" : "q"); - if (cookie->type == LSM) - (void)snprintf(p, (size_t)(end - p), ",type=lsm"); + testutil_check(__wt_snprintf(config, sizeof(config), + "key_format=%s,value_format=S,%s", + cookie->type == COL ? "r" : "q", + cookie->type == LSM ? ",type=lsm" : "")); if ((ret = session->create(session, cookie->uri, config)) != 0) if (ret != EEXIST) @@ -88,8 +86,9 @@ start_workers(table_type type) (table_type)((i % MAX_TABLE_TYPE) + 1); else g.cookies[i].type = type; - (void)snprintf(g.cookies[i].uri, 128, - "%s%04d", URI_BASE, g.cookies[i].id); + testutil_check(__wt_snprintf( + g.cookies[i].uri, sizeof(g.cookies[i].uri), + "%s%04d", URI_BASE, g.cookies[i].id)); /* Should probably be atomic to avoid races. */ if ((ret = create_table(session, &g.cookies[i])) != 0) @@ -132,7 +131,8 @@ worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val) char valuebuf[64]; cursor->set_key(cursor, keyno); - (void)snprintf(valuebuf, sizeof(valuebuf), "%037u", new_val); + testutil_check(__wt_snprintf( + valuebuf, sizeof(valuebuf), "%037u", new_val)); cursor->set_value(cursor, valuebuf); if ((ret = cursor->insert(cursor)) != 0) { if (ret == WT_ROLLBACK) @@ -153,7 +153,7 @@ worker(void *arg) WT_UNUSED(arg); - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); printf("worker thread starting: tid: %s\n", tid); (void)real_worker(); diff --git a/test/csuite/wt1965_col_efficiency/main.c b/test/csuite/wt1965_col_efficiency/main.c index a7235d81b31..e5b73d5e642 100644 --- a/test/csuite/wt1965_col_efficiency/main.c +++ b/test/csuite/wt1965_col_efficiency/main.c @@ -132,7 +132,8 @@ main(int argc, char *argv[]) testutil_check(opts->conn->open_session( opts->conn, NULL, NULL, &session)); - sprintf(table_format, "key_format=r,value_format="); + testutil_check(__wt_snprintf( + table_format, sizeof(table_format), "key_format=r,value_format=")); for (i = 0; i < NR_FIELDS; i++) strcat(table_format, "Q"); diff --git a/test/csuite/wt2246_col_append/main.c b/test/csuite/wt2246_col_append/main.c index 976e2269da6..9876582fffa 100644 --- a/test/csuite/wt2246_col_append/main.c +++ b/test/csuite/wt2246_col_append/main.c @@ -68,8 +68,8 @@ page_init(uint64_t n) else { if (recno % 3 == 0) ++vrecno; - snprintf(buf, - sizeof(buf), "%" PRIu64 " VALUE ------", vrecno); + testutil_check(__wt_snprintf(buf, + sizeof(buf), "%" PRIu64 " VALUE ------", vrecno)); cursor->set_value(cursor, buf); } testutil_check(cursor->insert(cursor)); @@ -112,19 +112,19 @@ main(int argc, char *argv[]) testutil_check(testutil_parse_opts(argc, argv, opts)); testutil_make_work_dir(opts->home); - snprintf(buf, sizeof(buf), + testutil_check(__wt_snprintf(buf, sizeof(buf), "create," "cache_size=%s," "eviction=(threads_max=5)," "statistics=(fast)", - opts->table_type == TABLE_FIX ? "500MB" : "2GB"); + opts->table_type == TABLE_FIX ? "500MB" : "2GB")); testutil_check(wiredtiger_open(opts->home, NULL, buf, &opts->conn)); testutil_check( opts->conn->open_session(opts->conn, NULL, NULL, &session)); - snprintf(buf, sizeof(buf), + testutil_check(__wt_snprintf(buf, sizeof(buf), "key_format=r,value_format=%s," "allocation_size=4K,leaf_page_max=64K", - opts->table_type == TABLE_FIX ? "8t" : "S"); + opts->table_type == TABLE_FIX ? "8t" : "S")); testutil_check(session->create(session, opts->uri, buf)); testutil_check(session->close(session, NULL)); diff --git a/test/csuite/wt2323_join_visibility/main.c b/test/csuite/wt2323_join_visibility/main.c index a61f707e008..617490fec4d 100644 --- a/test/csuite/wt2323_join_visibility/main.c +++ b/test/csuite/wt2323_join_visibility/main.c @@ -106,14 +106,18 @@ main(int argc, char *argv[]) tablename = strchr(opts->uri, ':'); testutil_assert(tablename != NULL); tablename++; - snprintf(sharedopts->posturi, sizeof(sharedopts->posturi), - "index:%s:post", tablename); - snprintf(sharedopts->baluri, sizeof(sharedopts->baluri), - "index:%s:bal", tablename); - snprintf(sharedopts->flaguri, sizeof(sharedopts->flaguri), - "index:%s:flag", tablename); - snprintf(sharedopts->joinuri, sizeof(sharedopts->joinuri), - "join:%s", opts->uri); + testutil_check(__wt_snprintf( + sharedopts->posturi, sizeof(sharedopts->posturi), + "index:%s:post", tablename)); + testutil_check(__wt_snprintf( + sharedopts->baluri, sizeof(sharedopts->baluri), + "index:%s:bal", tablename)); + testutil_check(__wt_snprintf( + sharedopts->flaguri, sizeof(sharedopts->flaguri), + "index:%s:flag", tablename)); + testutil_check(__wt_snprintf( + sharedopts->joinuri, sizeof(sharedopts->joinuri), + "join:%s", opts->uri)); testutil_check(wiredtiger_open(opts->home, NULL, "create,cache_size=1G", &opts->conn)); @@ -350,19 +354,21 @@ static void *thread_join(void *arg) balcur->set_key(balcur, 0); testutil_check(balcur->search(balcur)); if (sharedopts->bloom) - sprintf(cfg, "compare=lt,strategy=bloom,count=%d", - N_RECORDS); + testutil_check(__wt_snprintf(cfg, sizeof(cfg), + "compare=lt,strategy=bloom,count=%d", N_RECORDS)); else - sprintf(cfg, "compare=lt"); + testutil_check(__wt_snprintf( + cfg, sizeof(cfg), "compare=lt")); testutil_check(session->join(session, joincur, balcur, cfg)); flagcur->set_key(flagcur, 0); testutil_check(flagcur->search(flagcur)); if (sharedopts->bloom) - sprintf(cfg, "compare=eq,strategy=bloom,count=%d", - N_RECORDS); + testutil_check(__wt_snprintf(cfg, sizeof(cfg), + "compare=eq,strategy=bloom,count=%d", N_RECORDS)); else - sprintf(cfg, "compare=eq"); + testutil_check(__wt_snprintf( + cfg, sizeof(cfg), "compare=eq")); testutil_check(session->join(session, joincur, flagcur, cfg)); /* Expect no values returned */ diff --git a/test/csuite/wt2447_join_main_table/main.c b/test/csuite/wt2447_join_main_table/main.c index 1368e7c8c09..656cea04145 100644 --- a/test/csuite/wt2447_join_main_table/main.c +++ b/test/csuite/wt2447_join_main_table/main.c @@ -102,9 +102,12 @@ main(int argc, char *argv[]) tablename = strchr(opts->uri, ':'); testutil_assert(tablename != NULL); tablename++; - snprintf(index1uri, sizeof(index1uri), "index:%s:index1", tablename); - snprintf(index2uri, sizeof(index2uri), "index:%s:index2", tablename); - snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri); + testutil_check(__wt_snprintf( + index1uri, sizeof(index1uri), "index:%s:index1", tablename)); + testutil_check(__wt_snprintf( + index2uri, sizeof(index2uri), "index:%s:index2", tablename)); + testutil_check(__wt_snprintf( + joinuri, sizeof(joinuri), "join:%s", opts->uri)); testutil_check(wiredtiger_open(opts->home, NULL, "statistics=(all),create", &opts->conn)); @@ -150,7 +153,8 @@ main(int argc, char *argv[]) cursor2->set_key(cursor2, half + 1); testutil_check(cursor2->search(cursor2)); - sprintf(bloom_cfg, "compare=lt,strategy=bloom,count=%d", half); + testutil_check(__wt_snprintf(bloom_cfg, sizeof(bloom_cfg), + "compare=lt,strategy=bloom,count=%d", half)); testutil_check(session->open_cursor(session, joinuri, NULL, NULL, &jcursor)); diff --git a/test/csuite/wt2592_join_schema/main.c b/test/csuite/wt2592_join_schema/main.c index 0ec1c765d99..be3eff6136c 100644 --- a/test/csuite/wt2592_join_schema/main.c +++ b/test/csuite/wt2592_join_schema/main.c @@ -82,9 +82,12 @@ main(int argc, char *argv[]) tablename = strchr(opts->uri, ':'); testutil_assert(tablename != NULL); tablename++; - snprintf(countryuri, sizeof(countryuri), "index:%s:country", tablename); - snprintf(yearuri, sizeof(yearuri), "index:%s:year", tablename); - snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri); + testutil_check(__wt_snprintf( + countryuri, sizeof(countryuri), "index:%s:country", tablename)); + testutil_check(__wt_snprintf( + yearuri, sizeof(yearuri), "index:%s:year", tablename)); + testutil_check(__wt_snprintf( + joinuri, sizeof(joinuri), "join:%s", opts->uri)); testutil_check(wiredtiger_open(opts->home, NULL, "create,cache_size=200M", &opts->conn)); diff --git a/test/csuite/wt2834_join_bloom_fix/main.c b/test/csuite/wt2834_join_bloom_fix/main.c index f2c54b942be..e128df29f41 100644 --- a/test/csuite/wt2834_join_bloom_fix/main.c +++ b/test/csuite/wt2834_join_bloom_fix/main.c @@ -83,10 +83,14 @@ main(int argc, char *argv[]) tablename = strchr(opts->uri, ':'); testutil_assert(tablename != NULL); tablename++; - snprintf(posturi, sizeof(posturi), "index:%s:post", tablename); - snprintf(balanceuri, sizeof(balanceuri), "index:%s:balance", tablename); - snprintf(flaguri, sizeof(flaguri), "index:%s:flag", tablename); - snprintf(joinuri, sizeof(joinuri), "join:%s", opts->uri); + testutil_check(__wt_snprintf( + posturi, sizeof(posturi), "index:%s:post", tablename)); + testutil_check(__wt_snprintf( + balanceuri, sizeof(balanceuri), "index:%s:balance", tablename)); + testutil_check(__wt_snprintf( + flaguri, sizeof(flaguri), "index:%s:flag", tablename)); + testutil_check(__wt_snprintf( + joinuri, sizeof(joinuri), "join:%s", opts->uri)); testutil_check(session->create(session, posturi, "columns=(post)")); testutil_check(session->create(session, balanceuri, @@ -126,14 +130,14 @@ main(int argc, char *argv[]) balancecur->set_key(balancecur, 0); testutil_check(balancecur->search(balancecur)); - sprintf(cfg, "compare=lt,strategy=bloom,count=%d", - N_RECORDS / 100); + testutil_check(__wt_snprintf(cfg, sizeof(cfg), + "compare=lt,strategy=bloom,count=%d", N_RECORDS / 100)); testutil_check(session->join(session, joincur, balancecur, cfg)); flagcur->set_key(flagcur, 0); testutil_check(flagcur->search(flagcur)); - sprintf(cfg, "compare=eq,strategy=bloom,count=%d", - N_RECORDS / 100); + testutil_check(__wt_snprintf(cfg, sizeof(cfg), + "compare=eq,strategy=bloom,count=%d", N_RECORDS / 100)); testutil_check(session->join(session, joincur, flagcur, cfg)); /* Expect no values returned */ diff --git a/test/csuite/wt2853_perf/main.c b/test/csuite/wt2853_perf/main.c index b365b03493a..46ba71372e5 100644 --- a/test/csuite/wt2853_perf/main.c +++ b/test/csuite/wt2853_perf/main.c @@ -114,12 +114,15 @@ main(int argc, char *argv[]) tablename = strchr(opts->uri, ':'); testutil_assert(tablename != NULL); tablename++; - snprintf(sharedopts->posturi, sizeof(sharedopts->posturi), - "index:%s:post", tablename); - snprintf(sharedopts->baluri, sizeof(sharedopts->baluri), - "index:%s:bal", tablename); - snprintf(sharedopts->flaguri, sizeof(sharedopts->flaguri), - "index:%s:flag", tablename); + testutil_check(__wt_snprintf( + sharedopts->posturi, sizeof(sharedopts->posturi), + "index:%s:post", tablename)); + testutil_check(__wt_snprintf( + sharedopts->baluri, sizeof(sharedopts->baluri), + "index:%s:bal", tablename)); + testutil_check(__wt_snprintf( + sharedopts->flaguri, sizeof(sharedopts->flaguri), + "index:%s:flag", tablename)); testutil_check(session->create(session, sharedopts->posturi, "columns=(post)")); diff --git a/test/csuite/wt2909_checkpoint_integrity/main.c b/test/csuite/wt2909_checkpoint_integrity/main.c index 0ae81543050..ce7bd72fa3f 100644 --- a/test/csuite/wt2909_checkpoint_integrity/main.c +++ b/test/csuite/wt2909_checkpoint_integrity/main.c @@ -267,9 +267,11 @@ enable_failures(uint64_t allow_writes, uint64_t allow_reads) char value[100]; testutil_check(setenv("WT_FAIL_FS_ENABLE", "1", 1)); - snprintf(value, sizeof(value), "%" PRIu64, allow_writes); + testutil_check(__wt_snprintf( + value, sizeof(value), "%" PRIu64, allow_writes)); testutil_check(setenv("WT_FAIL_FS_WRITE_ALLOW", value, 1)); - snprintf(value, sizeof(value), "%" PRIu64, allow_reads); + testutil_check(__wt_snprintf( + value, sizeof(value), "%" PRIu64, allow_reads)); testutil_check(setenv("WT_FAIL_FS_READ_ALLOW", value, 1)); } @@ -325,10 +327,11 @@ run_check_subtest(TEST_OPTS *opts, const char *debugger, uint64_t nops, subtest_args[narg++] = (char *)"-v"; /* subtest is always verbose */ subtest_args[narg++] = (char *)"-p"; subtest_args[narg++] = (char *)"-o"; - snprintf(sarg, sizeof(sarg), "%" PRIu64, nops); + testutil_check(__wt_snprintf(sarg, sizeof(sarg), "%" PRIu64, nops)); subtest_args[narg++] = sarg; /* number of operations */ subtest_args[narg++] = (char *)"-n"; - snprintf(rarg, sizeof(rarg), "%" PRIu64, opts->nrecords); + testutil_check(__wt_snprintf( + rarg, sizeof(rarg), "%" PRIu64, opts->nrecords)); subtest_args[narg++] = rarg; /* number of records */ subtest_args[narg++] = NULL; testutil_assert(narg <= MAX_ARGS); @@ -463,15 +466,17 @@ subtest_main(int argc, char *argv[], bool close_test) testutil_make_work_dir(opts->home); /* Redirect stderr, stdout. */ - sprintf(filename, "%s/%s", opts->home, STDERR_FILE); + testutil_check(__wt_snprintf( + filename, sizeof(filename), "%s/%s", opts->home, STDERR_FILE)); testutil_assert(freopen(filename, "a", stderr) != NULL); - sprintf(filename, "%s/%s", opts->home, STDOUT_FILE); + testutil_check(__wt_snprintf( + filename, sizeof(filename), "%s/%s", opts->home, STDOUT_FILE)); testutil_assert(freopen(filename, "a", stdout) != NULL); - snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "create,cache_size=250M,log=(enabled)," "transaction_sync=(enabled,method=none),extensions=(" WT_FAIL_FS_LIB - "=(early_load,config={environment=true,verbose=true})]"); + "=(early_load,config={environment=true,verbose=true})]")); testutil_check(wiredtiger_open(opts->home, NULL, config, &opts->conn)); testutil_check( diff --git a/test/csuite/wt3120_filesys/main.c b/test/csuite/wt3120_filesys/main.c index 09dce624066..2fae85017d4 100644 --- a/test/csuite/wt3120_filesys/main.c +++ b/test/csuite/wt3120_filesys/main.c @@ -52,8 +52,8 @@ main(int argc, char *argv[]) testutil_check(testutil_parse_opts(argc, argv, opts)); testutil_make_work_dir(opts->home); - snprintf(buf, sizeof(buf), - "create,extensions=(" WT_FAIL_FS_LIB "=(early_load=true))"); + testutil_check(__wt_snprintf(buf, sizeof(buf), + "create,extensions=(" WT_FAIL_FS_LIB "=(early_load=true))")); testutil_check(wiredtiger_open(opts->home, NULL, buf, &opts->conn)); testutil_check( opts->conn->open_session(opts->conn, NULL, NULL, &session)); diff --git a/test/cursor_order/cursor_order.c b/test/cursor_order/cursor_order.c index 62777f552bf..d3c64b54ab5 100644 --- a/test/cursor_order/cursor_order.c +++ b/test/cursor_order/cursor_order.c @@ -181,19 +181,15 @@ wt_connect(SHARED_CONFIG *cfg, char *config_open) }; int ret; char config[512]; - size_t print_count; testutil_clean_work_dir(home); testutil_make_work_dir(home); - print_count = (size_t)snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "create,statistics=(all),error_prefix=\"%s\",%s%s", progname, config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open); - - if (print_count >= sizeof(config)) - testutil_die(EINVAL, "Config string too long"); + config_open == NULL ? "" : config_open)); if ((ret = wiredtiger_open( home, &event_handler, config, &cfg->conn)) != 0) diff --git a/test/cursor_order/cursor_order_file.c b/test/cursor_order/cursor_order_file.c index 5dc7194b5fb..42d7af54de4 100644 --- a/test/cursor_order/cursor_order_file.c +++ b/test/cursor_order/cursor_order_file.c @@ -34,23 +34,21 @@ file_create(SHARED_CONFIG *cfg, const char *name) WT_CONNECTION *conn; WT_SESSION *session; int ret; - char *p, *end, config[128]; + char config[128]; conn = cfg->conn; if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "conn.session"); - p = config; - end = config + sizeof(config); - p += snprintf(p, (size_t)(end - p), + testutil_check(__wt_snprintf(config, sizeof(config), "key_format=%s," "internal_page_max=%d," "split_deepen_min_child=200," - "leaf_page_max=%d,", - cfg->ftype == ROW ? "S" : "r", 16 * 1024, 128 * 1024); - if (cfg->ftype == FIX) - (void)snprintf(p, (size_t)(end - p), ",value_format=3t"); + "leaf_page_max=%d," + "%s", + cfg->ftype == ROW ? "S" : "r", 16 * 1024, 128 * 1024, + cfg->ftype == FIX ? ",value_format=3t" : "")); if ((ret = session->create(session, name, config)) != 0) if (ret != EEXIST) @@ -67,9 +65,10 @@ load(SHARED_CONFIG *cfg, const char *name) WT_CURSOR *cursor; WT_ITEM *value, _value; WT_SESSION *session; - char keybuf[64], valuebuf[64]; - int64_t keyno; + size_t len; + uint64_t keyno; int ret; + char keybuf[64], valuebuf[64]; conn = cfg->conn; @@ -83,9 +82,10 @@ load(SHARED_CONFIG *cfg, const char *name) testutil_die(ret, "cursor.open"); value = &_value; - for (keyno = 1; keyno <= (int64_t)cfg->nkeys; ++keyno) { + for (keyno = 1; keyno <= cfg->nkeys; ++keyno) { if (cfg->ftype == ROW) { - snprintf(keybuf, sizeof(keybuf), "%016u", (u_int)keyno); + testutil_check(__wt_snprintf( + keybuf, sizeof(keybuf), "%016" PRIu64, keyno)); cursor->set_key(cursor, keybuf); } else cursor->set_key(cursor, (uint32_t)keyno); @@ -93,8 +93,10 @@ load(SHARED_CONFIG *cfg, const char *name) if (cfg->ftype == FIX) cursor->set_value(cursor, 0x01); else { - value->size = (uint32_t)snprintf( - valuebuf, sizeof(valuebuf), "%37u", (u_int)keyno); + testutil_check(__wt_snprintf_len_set( + valuebuf, sizeof(valuebuf), + &len, "%37" PRIu64, keyno)); + value->size = (uint32_t)len; cursor->set_value(cursor, value); } if ((ret = cursor->insert(cursor)) != 0) diff --git a/test/cursor_order/cursor_order_ops.c b/test/cursor_order/cursor_order_ops.c index 58da49b2991..299f22684c9 100644 --- a/test/cursor_order/cursor_order_ops.c +++ b/test/cursor_order/cursor_order_ops.c @@ -69,7 +69,8 @@ ops_start(SHARED_CONFIG *cfg) run_info[i].cfg = cfg; if (i == 0 || cfg->multiple_files) { run_info[i].name = dmalloc(64); - snprintf(run_info[i].name, 64, FNAME, (int)i); + testutil_check(__wt_snprintf( + run_info[i].name, 64, FNAME, (int)i)); /* Vary by orders of magnitude */ if (cfg->vary_nops) @@ -93,8 +94,8 @@ ops_start(SHARED_CONFIG *cfg) run_info[offset].name = dmalloc(64); /* Have reverse scans read from tables with writes. */ name_index = i % cfg->append_inserters; - snprintf( - run_info[offset].name, 64, FNAME, (int)name_index); + testutil_check(__wt_snprintf( + run_info[offset].name, 64, FNAME, (int)name_index)); /* Vary by orders of magnitude */ if (cfg->vary_nops) @@ -231,7 +232,7 @@ reverse_scan(void *arg) id = (uintmax_t)arg; s = &run_info[id]; cfg = s->cfg; - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); __wt_random_init(&s->rnd); printf(" reverse scan thread %2" PRIuMAX @@ -272,6 +273,7 @@ append_insert_op( { WT_ITEM *value, _value; uint64_t keyno; + size_t len; int ret; char keybuf[64], valuebuf[64]; @@ -281,7 +283,8 @@ append_insert_op( keyno = __wt_atomic_add64(&cfg->key_range, 1); if (cfg->ftype == ROW) { - snprintf(keybuf, sizeof(keybuf), "%016u", (u_int)keyno); + testutil_check(__wt_snprintf( + keybuf, sizeof(keybuf), "%016" PRIu64, keyno)); cursor->set_key(cursor, keybuf); } else cursor->set_key(cursor, (uint32_t)keyno); @@ -291,8 +294,9 @@ append_insert_op( if (cfg->ftype == FIX) cursor->set_value(cursor, 0x10); else { - value->size = (uint32_t)snprintf( - valuebuf, sizeof(valuebuf), "XXX %37u", (u_int)keyno); + testutil_check(__wt_snprintf_len_set( + valuebuf, sizeof(valuebuf), &len, "XXX %37" PRIu64, keyno)); + value->size = (uint32_t)len; cursor->set_value(cursor, value); } if ((ret = cursor->insert(cursor)) != 0) @@ -318,7 +322,7 @@ append_insert(void *arg) id = (uintmax_t)arg; s = &run_info[id]; cfg = s->cfg; - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); __wt_random_init(&s->rnd); printf("write thread %2" PRIuMAX " starting: tid: %s, file: %s\n", diff --git a/test/fops/file.c b/test/fops/file.c index 66c23dfed3c..d1cd22ab391 100644 --- a/test/fops/file.c +++ b/test/fops/file.c @@ -71,7 +71,8 @@ obj_bulk_unique(int force) /* Generate a unique object name. */ if ((ret = pthread_rwlock_wrlock(&single)) != 0) testutil_die(ret, "pthread_rwlock_wrlock single"); - (void)snprintf(new_uri, sizeof(new_uri), "%s.%u", uri, ++uid); + testutil_check(__wt_snprintf( + new_uri, sizeof(new_uri), "%s.%u", uri, ++uid)); if ((ret = pthread_rwlock_unlock(&single)) != 0) testutil_die(ret, "pthread_rwlock_unlock single"); @@ -152,7 +153,8 @@ obj_create_unique(int force) /* Generate a unique object name. */ if ((ret = pthread_rwlock_wrlock(&single)) != 0) testutil_die(ret, "pthread_rwlock_wrlock single"); - (void)snprintf(new_uri, sizeof(new_uri), "%s.%u", uri, ++uid); + testutil_check(__wt_snprintf( + new_uri, sizeof(new_uri), "%s.%u", uri, ++uid)); if ((ret = pthread_rwlock_unlock(&single)) != 0) testutil_die(ret, "pthread_rwlock_unlock single"); diff --git a/test/fops/t.c b/test/fops/t.c index 469d5acd33a..07ac07349e3 100644 --- a/test/fops/t.c +++ b/test/fops/t.c @@ -157,11 +157,11 @@ wt_startup(char *config_open) testutil_make_work_dir(home); - snprintf(config_buf, sizeof(config_buf), + testutil_check(__wt_snprintf(config_buf, sizeof(config_buf), "create,error_prefix=\"%s\",cache_size=5MB%s%s", progname, config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open); + config_open == NULL ? "" : config_open)); if ((ret = wiredtiger_open( home, &event_handler, config_buf, &conn)) != 0) testutil_die(ret, "wiredtiger_open"); diff --git a/test/format/backup.c b/test/format/backup.c index 69fdf771de9..8aa614fa970 100644 --- a/test/format/backup.c +++ b/test/format/backup.c @@ -63,7 +63,7 @@ copy_file(WT_SESSION *session, const char *name) len = strlen("BACKUP") + strlen(name) + 10; first = dmalloc(len); - (void)snprintf(first, len, "BACKUP/%s", name); + testutil_check(__wt_snprintf(first, len, "BACKUP/%s", name)); testutil_check(__wt_copy_and_sync(session, name, first)); /* @@ -72,7 +72,7 @@ copy_file(WT_SESSION *session, const char *name) */ len = strlen("BACKUP_COPY") + strlen(name) + 10; second = dmalloc(len); - (void)snprintf(second, len, "BACKUP_COPY/%s", name); + testutil_check(__wt_snprintf(second, len, "BACKUP_COPY/%s", name)); testutil_check(__wt_copy_and_sync(session, first, second)); free(first); diff --git a/test/format/config.c b/test/format/config.c index 535dcd677e2..22b40f7164d 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -257,8 +257,8 @@ config_compression(const char *conf_name) */ cstr = "none"; if (strcmp(conf_name, "logging_compression") == 0 && g.c_logging == 0) { - (void)snprintf( - confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr); + testutil_check(__wt_snprintf( + confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr)); config_single(confbuf, 0); return; } @@ -302,7 +302,8 @@ config_compression(const char *conf_name) break; } - (void)snprintf(confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr); + testutil_check(__wt_snprintf( + confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr)); config_single(confbuf, 0); } @@ -678,7 +679,8 @@ void config_single(const char *s, int perm) { CONFIG *cp; - long v; + long vlong; + uint32_t v; char *p; const char *ep; @@ -743,21 +745,22 @@ config_single(const char *s, int perm) return; } - v = -1; + vlong = -1; if (F_ISSET(cp, C_BOOL)) { if (strncmp(ep, "off", strlen("off")) == 0) - v = 0; + vlong = 0; else if (strncmp(ep, "on", strlen("on")) == 0) - v = 1; + vlong = 1; } - if (v == -1) { - v = strtol(ep, &p, 10); + if (vlong == -1) { + vlong = strtol(ep, &p, 10); if (*p != '\0') { fprintf(stderr, "%s: %s: illegal numeric value\n", progname, s); exit(EXIT_FAILURE); } } + v = (uint32_t)vlong; if (F_ISSET(cp, C_BOOL)) { if (v != 0 && v != 1) { fprintf(stderr, "%s: %s: value of boolean not 0 or 1\n", @@ -770,7 +773,7 @@ config_single(const char *s, int perm) progname, s, cp->min, cp->maxset); exit(EXIT_FAILURE); } - *cp->v = (uint32_t)v; + *cp->v = v; } /* diff --git a/test/format/ops.c b/test/format/ops.c index 5309edf81c0..72e885bd0d6 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -530,8 +530,9 @@ ops(void *arg) pthread_rwlock_trywrlock(&g.backup_lock) == EBUSY) ckpt_config = NULL; else { - (void)snprintf(ckpt_name, sizeof(ckpt_name), - "name=thread-%d", tinfo->id); + testutil_check(__wt_snprintf( + ckpt_name, sizeof(ckpt_name), + "name=thread-%d", tinfo->id)); ckpt_config = ckpt_name; } @@ -557,8 +558,9 @@ ops(void *arg) strcpy(ckpt_name, "checkpoint=WiredTigerCheckpoint"); else - (void)snprintf(ckpt_name, sizeof(ckpt_name), - "checkpoint=thread-%d", tinfo->id); + testutil_check(__wt_snprintf( + ckpt_name, sizeof(ckpt_name), + "checkpoint=thread-%d", tinfo->id)); ckpt_available = true; skip_checkpoint: /* Pick the next checkpoint operation. */ diff --git a/test/format/rebalance.c b/test/format/rebalance.c index 9849b7df82b..e35c62e7255 100644 --- a/test/format/rebalance.c +++ b/test/format/rebalance.c @@ -41,10 +41,10 @@ wts_rebalance(void) track("rebalance", 0ULL, NULL); /* Dump the current object. */ - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), ".." DIR_DELIM_STR ".." DIR_DELIM_STR "wt" " -h %s dump -f %s/rebalance.orig %s", - g.home, g.home, g.uri); + g.home, g.home, g.uri)); testutil_checkfmt(system(cmd), "command failed: %s", cmd); /* Rebalance, then verify the object. */ @@ -66,21 +66,21 @@ wts_rebalance(void) wts_verify("post-rebalance verify"); wts_close(); - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), ".." DIR_DELIM_STR ".." DIR_DELIM_STR "wt" " -h %s dump -f %s/rebalance.new %s", - g.home, g.home, g.uri); + g.home, g.home, g.uri)); testutil_checkfmt(system(cmd), "command failed: %s", cmd); /* Compare the old/new versions of the object. */ #ifdef _WIN32 - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), "fc /b %s\\rebalance.orig %s\\rebalance.new > NUL", - g.home, g.home); + g.home, g.home)); #else - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), "cmp %s/rebalance.orig %s/rebalance.new > /dev/null", - g.home, g.home); + g.home, g.home)); #endif testutil_checkfmt(system(cmd), "command failed: %s", cmd); } diff --git a/test/format/salvage.c b/test/format/salvage.c index 69805fb1018..f82dc34dd5f 100644 --- a/test/format/salvage.c +++ b/test/format/salvage.c @@ -70,29 +70,31 @@ corrupt(void) * It's a little tricky: if the data source is a file, we're looking * for "wt", if the data source is a table, we're looking for "wt.wt". */ - (void)snprintf(buf, sizeof(buf), "%s/%s", g.home, WT_NAME); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "%s/%s", g.home, WT_NAME)); if ((fd = open(buf, O_RDWR)) != -1) { #ifdef _WIN32 - (void)snprintf(copycmd, sizeof(copycmd), + testutil_check(__wt_snprintf(copycmd, sizeof(copycmd), "copy %s\\%s %s\\slvg.copy\\%s.corrupted", - g.home, WT_NAME, g.home, WT_NAME); + g.home, WT_NAME, g.home, WT_NAME)); #else - (void)snprintf(copycmd, sizeof(copycmd), + testutil_check(__wt_snprintf(copycmd, sizeof(copycmd), "cp %s/%s %s/slvg.copy/%s.corrupted", - g.home, WT_NAME, g.home, WT_NAME); + g.home, WT_NAME, g.home, WT_NAME)); #endif goto found; } - (void)snprintf(buf, sizeof(buf), "%s/%s.wt", g.home, WT_NAME); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "%s/%s.wt", g.home, WT_NAME)); if ((fd = open(buf, O_RDWR)) != -1) { #ifdef _WIN32 - (void)snprintf(copycmd, sizeof(copycmd), + testutil_check(__wt_snprintf(copycmd, sizeof(copycmd), "copy %s\\%s.wt %s\\slvg.copy\\%s.wt.corrupted", - g.home, WT_NAME, g.home, WT_NAME); + g.home, WT_NAME, g.home, WT_NAME)); #else - (void)snprintf(copycmd, sizeof(copycmd), + testutil_check(__wt_snprintf(copycmd, sizeof(copycmd), "cp %s/%s.wt %s/slvg.copy/%s.wt.corrupted", - g.home, WT_NAME, g.home, WT_NAME); + g.home, WT_NAME, g.home, WT_NAME)); #endif goto found; } @@ -103,7 +105,8 @@ found: if (fstat(fd, &sb) == -1) offset = mmrand(NULL, 0, (u_int)sb.st_size); len = (size_t)(20 + (sb.st_size / 100) * 2); - (void)snprintf(buf, sizeof(buf), "%s/slvg.corrupt", g.home); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "%s/slvg.corrupt", g.home)); if ((fp = fopen(buf, "w")) == NULL) testutil_die(errno, "salvage-corrupt: open: %s", buf); (void)fprintf(fp, diff --git a/test/format/util.c b/test/format/util.c index b9788f1ac75..983d03e2525 100644 --- a/test/format/util.c +++ b/test/format/util.c @@ -241,20 +241,23 @@ val_gen(WT_RAND_STATE *rnd, WT_ITEM *value, uint64_t keyno) void track(const char *tag, uint64_t cnt, TINFO *tinfo) { - static int lastlen = 0; - int len; + static size_t lastlen = 0; + size_t len; char msg[128]; if (g.c_quiet || tag == NULL) return; if (tinfo == NULL && cnt == 0) - len = snprintf(msg, sizeof(msg), "%4d: %s", g.run_cnt, tag); + testutil_check(__wt_snprintf_len_set( + msg, sizeof(msg), &len, "%4d: %s", g.run_cnt, tag)); else if (tinfo == NULL) - len = snprintf( - msg, sizeof(msg), "%4d: %s: %" PRIu64, g.run_cnt, tag, cnt); + testutil_check(__wt_snprintf_len_set( + msg, sizeof(msg), &len, + "%4d: %s: %" PRIu64, g.run_cnt, tag, cnt)); else - len = snprintf(msg, sizeof(msg), + testutil_check(__wt_snprintf_len_set( + msg, sizeof(msg), &len, "%4d: %s: " "search %" PRIu64 "%s, " "insert %" PRIu64 "%s, " @@ -268,7 +271,7 @@ track(const char *tag, uint64_t cnt, TINFO *tinfo) tinfo->update > M(9) ? tinfo->update / M(1) : tinfo->update, tinfo->update > M(9) ? "M" : "", tinfo->remove > M(9) ? tinfo->remove / M(1) : tinfo->remove, - tinfo->remove > M(9) ? "M" : ""); + tinfo->remove > M(9) ? "M" : "")); if (lastlen > len) { memset(msg + len, ' ', (size_t)(lastlen - len)); @@ -297,27 +300,30 @@ path_setup(const char *home) /* Log file. */ len = strlen(g.home) + strlen("log") + 2; g.home_log = dmalloc(len); - snprintf(g.home_log, len, "%s/%s", g.home, "log"); + testutil_check(__wt_snprintf(g.home_log, len, "%s/%s", g.home, "log")); /* RNG log file. */ len = strlen(g.home) + strlen("rand") + 2; g.home_rand = dmalloc(len); - snprintf(g.home_rand, len, "%s/%s", g.home, "rand"); + testutil_check(__wt_snprintf( + g.home_rand, len, "%s/%s", g.home, "rand")); /* Run file. */ len = strlen(g.home) + strlen("CONFIG") + 2; g.home_config = dmalloc(len); - snprintf(g.home_config, len, "%s/%s", g.home, "CONFIG"); + testutil_check(__wt_snprintf( + g.home_config, len, "%s/%s", g.home, "CONFIG")); /* Statistics file. */ len = strlen(g.home) + strlen("stats") + 2; g.home_stats = dmalloc(len); - snprintf(g.home_stats, len, "%s/%s", g.home, "stats"); + testutil_check(__wt_snprintf( + g.home_stats, len, "%s/%s", g.home, "stats")); /* BDB directory. */ len = strlen(g.home) + strlen("bdb") + 2; g.home_bdb = dmalloc(len); - snprintf(g.home_bdb, len, "%s/%s", g.home, "bdb"); + testutil_check(__wt_snprintf(g.home_bdb, len, "%s/%s", g.home, "bdb")); /* * Home directory initialize command: create the directory if it doesn't @@ -336,21 +342,23 @@ path_setup(const char *home) "cd %s & mkdir KVS" len = strlen(g.home) * 7 + strlen(CMD) + 1; g.home_init = dmalloc(len); - snprintf(g.home_init, len, CMD, - g.home, g.home, g.home, g.home, g.home, g.home, g.home); + testutil_check(__wt_snprintf(g.home_init, len, CMD, + g.home, g.home, g.home, g.home, g.home, g.home, g.home)); #else #define CMD "test -e %s || mkdir %s; " \ "cd %s > /dev/null && rm -rf `ls | sed /rand/d`; " \ "mkdir KVS" len = strlen(g.home) * 3 + strlen(CMD) + 1; g.home_init = dmalloc(len); - snprintf(g.home_init, len, CMD, g.home, g.home, g.home); + testutil_check(__wt_snprintf( + g.home_init, len, CMD, g.home, g.home, g.home)); #endif /* Primary backup directory. */ len = strlen(g.home) + strlen("BACKUP") + 2; g.home_backup = dmalloc(len); - snprintf(g.home_backup, len, "%s/%s", g.home, "BACKUP"); + testutil_check(__wt_snprintf( + g.home_backup, len, "%s/%s", g.home, "BACKUP")); /* * Backup directory initialize command, remove and re-create the primary @@ -365,9 +373,9 @@ path_setup(const char *home) len = strlen(g.home) * 4 + strlen("BACKUP") * 2 + strlen("BACKUP_COPY") * 2 + strlen(CMD) + 1; g.home_backup_init = dmalloc(len); - snprintf(g.home_backup_init, len, CMD, + testutil_check(__wt_snprintf(g.home_backup_init, len, CMD, g.home, "BACKUP", g.home, "BACKUP_COPY", - g.home, "BACKUP", g.home, "BACKUP_COPY"); + g.home, "BACKUP", g.home, "BACKUP_COPY")); /* * Salvage command, save the interesting files so we can replay the @@ -390,7 +398,7 @@ path_setup(const char *home) #endif len = strlen(g.home) + strlen(CMD) + 1; g.home_salvage_copy = dmalloc(len); - snprintf(g.home_salvage_copy, len, CMD, g.home); + testutil_check(__wt_snprintf(g.home_salvage_copy, len, CMD, g.home)); } /* @@ -489,8 +497,9 @@ alter(void *arg) while (!g.workers_finished) { period = mmrand(NULL, 1, 10); - snprintf(buf, sizeof(buf), - "access_pattern_hint=%s", access_value ? "random" : "none"); + testutil_check(__wt_snprintf(buf, sizeof(buf), + "access_pattern_hint=%s", + access_value ? "random" : "none")); access_value = !access_value; if (session->alter(session, g.uri, buf) != 0) break; diff --git a/test/format/wts.c b/test/format/wts.c index a87aa5b9f88..6aa4784d1c1 100644 --- a/test/format/wts.c +++ b/test/format/wts.c @@ -120,8 +120,15 @@ static WT_EVENT_HANDLER event_handler = { NULL /* Close handler. */ }; -#undef REMAIN -#define REMAIN(p, end) (size_t)((p) >= (end) ? 0 : (end) - (p)) +#define CONFIG_APPEND(p, ...) do { \ + size_t __len; \ + testutil_check( \ + __wt_snprintf_len_set(p, max, &__len, __VA_ARGS__)); \ + if (__len > max) \ + __len = max; \ + p += __len; \ + max -= __len; \ +} while (0) /* * wts_open -- @@ -132,14 +139,15 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) { WT_CONNECTION *conn; WT_DECL_RET; - char *config, *end, *p, helium_config[1024]; + size_t max; + char *config, *p, helium_config[1024]; *connp = NULL; config = p = g.wiredtiger_open_config; - end = config + sizeof(g.wiredtiger_open_config); + max = sizeof(g.wiredtiger_open_config); - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, "create=true," "cache_size=%" PRIu32 "MB," "checkpoint_sync=false," @@ -148,26 +156,25 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) /* In-memory configuration. */ if (g.c_in_memory != 0) - p += snprintf(p, REMAIN(p, end), ",in_memory=1"); + CONFIG_APPEND(p, ",in_memory=1"); /* LSM configuration. */ if (DATASOURCE("lsm")) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",lsm_manager=(worker_thread_max=%" PRIu32 "),", g.c_lsm_worker_threads); - if (DATASOURCE("lsm") || g.c_cache < 20) { - p += snprintf(p, REMAIN(p, end), ",eviction_dirty_trigger=95"); - } + if (DATASOURCE("lsm") || g.c_cache < 20) + CONFIG_APPEND(p, ",eviction_dirty_trigger=95"); /* Eviction worker configuration. */ if (g.c_evict_max != 0) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",eviction=(threads_max=%" PRIu32 ")", g.c_evict_max); /* Logging configuration. */ if (g.c_logging) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",log=(enabled=true,archive=%d,prealloc=%d" ",compressor=\"%s\")", g.c_logging_archive ? 1 : 0, @@ -175,21 +182,21 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) compressor(g.c_logging_compression_flag)); if (g.c_encryption) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",encryption=(name=%s)", encryptor(g.c_encryption_flag)); /* Miscellaneous. */ #ifdef HAVE_POSIX_MEMALIGN - p += snprintf(p, REMAIN(p, end), ",buffer_alignment=512"); + CONFIG_APPEND(p, ",buffer_alignment=512"); #endif - p += snprintf(p, REMAIN(p, end), ",mmap=%d", g.c_mmap ? 1 : 0); + CONFIG_APPEND(p, ",mmap=%d", g.c_mmap ? 1 : 0); if (g.c_direct_io) - p += snprintf(p, REMAIN(p, end), ",direct_io=(data)"); + CONFIG_APPEND(p, ",direct_io=(data)"); if (g.c_data_extend) - p += snprintf(p, REMAIN(p, end), ",file_extend=(data=8MB)"); + CONFIG_APPEND(p, ",file_extend=(data=8MB)"); /* * Run the statistics server and/or maintain statistics in the engine. @@ -198,18 +205,18 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) if (g.c_statistics_server) { if (mmrand(NULL, 0, 5) == 1 && memcmp(g.uri, "file:", strlen("file:")) == 0) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",statistics=(fast)" ",statistics_log=(wait=5,sources=(\"file:\"))"); else - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",statistics=(fast),statistics_log=(wait=5)"); } else - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",statistics=(%s)", g.c_statistics ? "fast" : "none"); /* Extensions. */ - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",extensions=[" "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],", g.c_reverse ? REVERSE_PATH : "", @@ -227,11 +234,11 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) * override the standard configuration. */ if (g.c_config_open != NULL) - p += snprintf(p, REMAIN(p, end), ",%s", g.c_config_open); + CONFIG_APPEND(p, ",%s", g.c_config_open); if (g.config_open != NULL) - p += snprintf(p, REMAIN(p, end), ",%s", g.config_open); + CONFIG_APPEND(p, ",%s", g.config_open); - if (REMAIN(p, end) == 0) + if (max == 0) testutil_die(ENOMEM, "wiredtiger_open configuration buffer too small"); @@ -259,12 +266,13 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) if (DATASOURCE("helium")) { if (g.helium_mount == NULL) testutil_die(EINVAL, "no Helium mount point specified"); - (void)snprintf(helium_config, sizeof(helium_config), + testutil_check( + __wt_snprintf(helium_config, sizeof(helium_config), "entry=wiredtiger_extension_init,config=[" "helium_verbose=0," "dev1=[helium_devices=\"he://./%s\"," "helium_o_volume_truncate=1]]", - g.helium_mount); + g.helium_mount)); if ((ret = conn->load_extension( conn, HELIUM_PATH, helium_config)) != 0) testutil_die(ret, @@ -299,13 +307,13 @@ wts_init(void) { WT_CONNECTION *conn; WT_SESSION *session; + size_t max; uint32_t maxintlpage, maxintlkey, maxleafpage, maxleafkey, maxleafvalue; - char config[4096], *end, *p; + char config[4096], *p; conn = g.wts_conn; - p = config; - end = config + sizeof(config); + max = sizeof(config); /* * Ensure that we can service at least one operation per-thread @@ -326,7 +334,7 @@ wts_init(void) if (maxleafpage > 512) maxleafpage >>= 1; } - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, "key_format=%s," "allocation_size=512,%s" "internal_page_max=%" PRIu32 ",leaf_page_max=%" PRIu32, @@ -340,43 +348,35 @@ wts_init(void) */ maxintlkey = mmrand(NULL, maxintlpage / 50, maxintlpage / 40); if (maxintlkey > 20) - p += snprintf(p, REMAIN(p, end), - ",internal_key_max=%" PRIu32, maxintlkey); + CONFIG_APPEND(p, ",internal_key_max=%" PRIu32, maxintlkey); maxleafkey = mmrand(NULL, maxleafpage / 50, maxleafpage / 40); if (maxleafkey > 20) - p += snprintf(p, REMAIN(p, end), - ",leaf_key_max=%" PRIu32, maxleafkey); + CONFIG_APPEND(p, ",leaf_key_max=%" PRIu32, maxleafkey); maxleafvalue = mmrand(NULL, maxleafpage * 10, maxleafpage / 40); if (maxleafvalue > 40 && maxleafvalue < 100 * 1024) - p += snprintf(p, REMAIN(p, end), - ",leaf_value_max=%" PRIu32, maxleafvalue); + CONFIG_APPEND(p, ",leaf_value_max=%" PRIu32, maxleafvalue); switch (g.type) { case FIX: - p += snprintf(p, REMAIN(p, end), - ",value_format=%" PRIu32 "t", g.c_bitcnt); + CONFIG_APPEND(p, ",value_format=%" PRIu32 "t", g.c_bitcnt); break; case ROW: if (g.c_huffman_key) - p += snprintf(p, REMAIN(p, end), - ",huffman_key=english"); + CONFIG_APPEND(p, ",huffman_key=english"); if (g.c_prefix_compression) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",prefix_compression_min=%" PRIu32, g.c_prefix_compression_min); else - p += snprintf(p, REMAIN(p, end), - ",prefix_compression=false"); + CONFIG_APPEND(p, ",prefix_compression=false"); if (g.c_reverse) - p += snprintf(p, REMAIN(p, end), - ",collator=reverse"); + CONFIG_APPEND(p, ",collator=reverse"); /* FALLTHROUGH */ case VAR: if (g.c_huffman_value) - p += snprintf(p, REMAIN(p, end), - ",huffman_value=english"); + CONFIG_APPEND(p, ",huffman_value=english"); if (g.c_dictionary) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",dictionary=%" PRIu32, mmrand(NULL, 123, 517)); break; } @@ -384,66 +384,63 @@ wts_init(void) /* Configure checksums. */ switch (g.c_checksum_flag) { case CHECKSUM_OFF: - p += snprintf(p, REMAIN(p, end), ",checksum=\"off\""); + CONFIG_APPEND(p, ",checksum=\"off\""); break; case CHECKSUM_ON: - p += snprintf(p, REMAIN(p, end), ",checksum=\"on\""); + CONFIG_APPEND(p, ",checksum=\"on\""); break; case CHECKSUM_UNCOMPRESSED: - p += snprintf(p, REMAIN(p, end), ",checksum=\"uncompressed\""); + CONFIG_APPEND(p, ",checksum=\"uncompressed\""); break; } /* Configure compression. */ if (g.c_compression_flag != COMPRESS_NONE) - p += snprintf(p, REMAIN(p, end), ",block_compressor=\"%s\"", + CONFIG_APPEND(p, ",block_compressor=\"%s\"", compressor(g.c_compression_flag)); /* Configure Btree internal key truncation. */ - p += snprintf(p, REMAIN(p, end), ",internal_key_truncate=%s", + CONFIG_APPEND(p, ",internal_key_truncate=%s", g.c_internal_key_truncation ? "true" : "false"); /* Configure Btree page key gap. */ - p += snprintf(p, REMAIN(p, end), ",key_gap=%" PRIu32, g.c_key_gap); + CONFIG_APPEND(p, ",key_gap=%" PRIu32, g.c_key_gap); /* Configure Btree split page percentage. */ - p += snprintf(p, REMAIN(p, end), ",split_pct=%" PRIu32, g.c_split_pct); + CONFIG_APPEND(p, ",split_pct=%" PRIu32, g.c_split_pct); /* Configure LSM and data-sources. */ if (DATASOURCE("helium")) - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",type=helium,helium_o_compress=%d,helium_o_truncate=1", g.c_compression_flag == COMPRESS_NONE ? 0 : 1); if (DATASOURCE("kvsbdb")) - p += snprintf(p, REMAIN(p, end), ",type=kvsbdb"); + CONFIG_APPEND(p, ",type=kvsbdb"); if (DATASOURCE("lsm")) { - p += snprintf(p, REMAIN(p, end), ",type=lsm,lsm=("); - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, ",type=lsm,lsm=("); + CONFIG_APPEND(p, "auto_throttle=%s,", g.c_auto_throttle ? "true" : "false"); - p += snprintf(p, REMAIN(p, end), - "chunk_size=%" PRIu32 "MB,", g.c_chunk_size); + CONFIG_APPEND(p, "chunk_size=%" PRIu32 "MB,", g.c_chunk_size); /* * We can't set bloom_oldest without bloom, and we want to test * with Bloom filters on most of the time anyway. */ if (g.c_bloom_oldest) g.c_bloom = 1; - p += snprintf(p, REMAIN(p, end), - "bloom=%s,", g.c_bloom ? "true" : "false"); - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, "bloom=%s,", g.c_bloom ? "true" : "false"); + CONFIG_APPEND(p, "bloom_bit_count=%" PRIu32 ",", g.c_bloom_bit_count); - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, "bloom_hash_count=%" PRIu32 ",", g.c_bloom_hash_count); - p += snprintf(p, REMAIN(p, end), + CONFIG_APPEND(p, "bloom_oldest=%s,", g.c_bloom_oldest ? "true" : "false"); - p += snprintf(p, REMAIN(p, end), - "merge_max=%" PRIu32 ",", g.c_merge_max); - p += snprintf(p, REMAIN(p, end), ",)"); + CONFIG_APPEND(p, "merge_max=%" PRIu32 ",", g.c_merge_max); + CONFIG_APPEND(p, ",)"); } - if (REMAIN(p, end) == 0) + if (max == 0) testutil_die(ENOMEM, "WT_SESSION.create configuration buffer too small"); @@ -490,14 +487,14 @@ wts_dump(const char *tag, int dump_bdb) len = strlen(g.home) + strlen(BERKELEY_DB_PATH) + strlen(g.uri) + 100; cmd = dmalloc(len); - (void)snprintf(cmd, len, + testutil_check(__wt_snprintf(cmd, len, "sh s_dumpcmp -h %s %s %s %s %s %s", g.home, dump_bdb ? "-b " : "", dump_bdb ? BERKELEY_DB_PATH : "", g.type == FIX || g.type == VAR ? "-c" : "", g.uri == NULL ? "" : "-n", - g.uri == NULL ? "" : g.uri); + g.uri == NULL ? "" : g.uri)); testutil_checkfmt(system(cmd), "%s: dump comparison failed", tag); free(cmd); @@ -587,7 +584,7 @@ wts_stats(void) fprintf(fp, "\n\n====== Data source statistics:\n"); len = strlen("statistics:") + strlen(g.uri) + 1; stat_name = dmalloc(len); - snprintf(stat_name, len, "statistics:%s", g.uri); + testutil_check(__wt_snprintf(stat_name, len, "statistics:%s", g.uri)); testutil_check(session->open_cursor( session, stat_name, NULL, NULL, &cursor)); free(stat_name); diff --git a/test/manydbs/manydbs.c b/test/manydbs/manydbs.c index 345c470ba90..42020d6ce9a 100644 --- a/test/manydbs/manydbs.c +++ b/test/manydbs/manydbs.c @@ -168,7 +168,8 @@ main(int argc, char *argv[]) testutil_make_work_dir(home); __wt_random_init(&rnd); for (i = 0; i < dbs; ++i) { - snprintf(hometmp, HOME_SIZE, "%s/%s.%d", home, HOME_BASE, i); + testutil_check(__wt_snprintf( + hometmp, HOME_SIZE, "%s/%s.%d", home, HOME_BASE, i)); testutil_make_work_dir(hometmp); /* * Open each database. Rotate different configurations diff --git a/test/readonly/readonly.c b/test/readonly/readonly.c index 746aecbf6c5..66c7a0ca692 100644 --- a/test/readonly/readonly.c +++ b/test/readonly/readonly.c @@ -206,10 +206,12 @@ main(int argc, char *argv[]) * Set up all the directory names. */ testutil_work_dir_from_path(home, sizeof(home), working_dir); - (void)snprintf(home_wr, sizeof(home_wr), "%s%s", home, HOME_WR_SUFFIX); - (void)snprintf(home_rd, sizeof(home_rd), "%s%s", home, HOME_RD_SUFFIX); - (void)snprintf( - home_rd2, sizeof(home_rd2), "%s%s", home, HOME_RD2_SUFFIX); + testutil_check(__wt_snprintf( + home_wr, sizeof(home_wr), "%s%s", home, HOME_WR_SUFFIX)); + testutil_check(__wt_snprintf( + home_rd, sizeof(home_rd), "%s%s", home, HOME_RD_SUFFIX)); + testutil_check(__wt_snprintf( + home_rd2, sizeof(home_rd2), "%s%s", home, HOME_RD2_SUFFIX)); if (!child) { testutil_make_work_dir(home); testutil_make_work_dir(home_wr); @@ -268,22 +270,22 @@ main(int argc, char *argv[]) * Copy the database. Remove any lock file from one copy * and chmod the copies to be read-only permissions. */ - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), "cp -rp %s/* %s; rm -f %s/WiredTiger.lock", - home, home_wr, home_wr); + home, home_wr, home_wr)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), "cp -rp %s/* %s; chmod 0555 %s; chmod -R 0444 %s/*", - home, home_rd, home_rd, home_rd); + home, home_rd, home_rd, home_rd)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); - (void)snprintf(cmd, sizeof(cmd), + testutil_check(__wt_snprintf(cmd, sizeof(cmd), "cp -rp %s/* %s; rm -f %s/WiredTiger.lock; " "chmod 0555 %s; chmod -R 0444 %s/*", - home, home_rd2, home_rd2, home_rd2, home_rd2); + home, home_rd2, home_rd2, home_rd2, home_rd2)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); @@ -327,8 +329,8 @@ main(int argc, char *argv[]) * * The child will exit with success if its test passes. */ - (void)snprintf( - cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) @@ -337,8 +339,8 @@ main(int argc, char *argv[]) /* * Scenario 2. Run child with writable config. */ - (void)snprintf( - cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) @@ -358,8 +360,8 @@ main(int argc, char *argv[]) /* * Scenario 3. Child read-only. */ - (void)snprintf( - cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "%s -h %s -R", saved_argv0, working_dir)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) @@ -368,8 +370,8 @@ main(int argc, char *argv[]) /* * Scenario 4. Run child with writable config. */ - (void)snprintf( - cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "%s -h %s -W", saved_argv0, working_dir)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); if (WEXITSTATUS(status) != 0) @@ -390,11 +392,12 @@ main(int argc, char *argv[]) * We need to chmod the read-only databases back so that they can * be removed by scripts. */ - (void)snprintf(cmd, sizeof(cmd), "chmod 0777 %s %s", home_rd, home_rd2); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "chmod 0777 %s %s", home_rd, home_rd2)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); - (void)snprintf(cmd, sizeof(cmd), "chmod -R 0666 %s/* %s/*", - home_rd, home_rd2); + testutil_check(__wt_snprintf( + cmd, sizeof(cmd), "chmod -R 0666 %s/* %s/*", home_rd, home_rd2)); if ((status = system(cmd)) < 0) testutil_die(status, "system: %s", cmd); printf(" *** Readonly test successful ***\n"); diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c index 1d6599ce1b3..febe6530534 100644 --- a/test/recovery/random-abort.c +++ b/test/recovery/random-abort.c @@ -94,14 +94,16 @@ thread_run(void *arg) /* * The value is the name of the record file with our id appended. */ - snprintf(buf, sizeof(buf), RECORDS_FILE, td->id); + testutil_check(__wt_snprintf(buf, sizeof(buf), RECORDS_FILE, td->id)); /* * Set up a large value putting our id in it. Write it in there a * bunch of times, but the rest of the buffer can just be zero. */ - snprintf(lgbuf, sizeof(lgbuf), "th-%" PRIu32, td->id); + testutil_check(__wt_snprintf( + lgbuf, sizeof(lgbuf), "th-%" PRIu32, td->id)); for (i = 0; i < 128; i += strlen(lgbuf)) - snprintf(&large[i], lsize - i, "%s", lgbuf); + testutil_check(__wt_snprintf( + &large[i], lsize - i, "%s", lgbuf)); /* * Keep a separate file with the records we wrote for checking. */ @@ -124,7 +126,8 @@ thread_run(void *arg) * Write our portion of the key space until we're killed. */ for (i = td->start; ; ++i) { - snprintf(kname, sizeof(kname), "%" PRIu64, i); + testutil_check(__wt_snprintf( + kname, sizeof(kname), "%" PRIu64, i)); cursor->set_key(cursor, kname); /* * Every 30th record write a very large record that exceeds the @@ -313,7 +316,8 @@ main(int argc, char *argv[]) * still exists in case the child aborts for some reason we * don't stay in this loop forever. */ - snprintf(statname, sizeof(statname), "%s/%s", home, fs_main); + testutil_check(__wt_snprintf( + statname, sizeof(statname), "%s/%s", home, fs_main)); while (stat(statname, &sb) != 0 && kill(pid, 0) == 0) sleep(1); sleep(timeout); @@ -348,7 +352,8 @@ main(int argc, char *argv[]) fatal = false; for (i = 0; i < nth; ++i) { middle = 0; - snprintf(fname, sizeof(fname), RECORDS_FILE, i); + testutil_check(__wt_snprintf( + fname, sizeof(fname), RECORDS_FILE, i)); if ((fp = fopen(fname, "r")) == NULL) testutil_die(errno, "fopen: %s", fname); @@ -376,7 +381,8 @@ main(int argc, char *argv[]) fname, key, last_key); break; } - snprintf(kname, sizeof(kname), "%" PRIu64, key); + testutil_check(__wt_snprintf( + kname, sizeof(kname), "%" PRIu64, key)); cursor->set_key(cursor, kname); if ((ret = cursor->search(cursor)) != 0) { if (ret != WT_NOTFOUND) diff --git a/test/recovery/truncated-log.c b/test/recovery/truncated-log.c index 1f0a0f7a7bd..a127d8c1c63 100644 --- a/test/recovery/truncated-log.c +++ b/test/recovery/truncated-log.c @@ -30,11 +30,6 @@ #include -#ifdef _WIN32 -/* snprintf is not supported on <= VS2013 */ -#define snprintf _snprintf -#endif - static char home[1024]; /* Program working dir */ static const char * const uri = "table:main"; @@ -137,7 +132,8 @@ usage(void) * Child process creates the database and table, and then writes data into * the table until it is killed by the parent. */ -static void fill_db(void)WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); +static void fill_db(void) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); static void fill_db(void) { @@ -193,9 +189,9 @@ fill_db(void) max_key = min_key * 2; first = true; for (i = 0; i < max_key; ++i) { - snprintf(k, sizeof(k), "key%03d", (int)i); - snprintf(v, sizeof(v), "value%0*d", - (int)(V_SIZE - strlen("value")), (int)i); + testutil_check(__wt_snprintf(k, sizeof(k), "key%03d", (int)i)); + testutil_check(__wt_snprintf(v, sizeof(v), "value%0*d", + (int)(V_SIZE - (strlen("value") + 1)), (int)i)); cursor->set_key(cursor, k); cursor->set_value(cursor, v); if ((ret = cursor->insert(cursor)) != 0) diff --git a/test/salvage/salvage.c b/test/salvage/salvage.c index 942f7faba03..83f9c6349bc 100644 --- a/test/salvage/salvage.c +++ b/test/salvage/salvage.c @@ -440,7 +440,8 @@ run(int r) process(); - snprintf(buf, sizeof(buf), "cmp %s %s > /dev/null", DUMP, RSLT); + testutil_check(__wt_snprintf( + buf, sizeof(buf), "cmp %s %s > /dev/null", DUMP, RSLT)); if (system(buf)) { fprintf(stderr, "check failed, salvage results were incorrect\n"); @@ -485,28 +486,28 @@ build(int ikey, int ivalue, int cnt) switch (page_type) { case WT_PAGE_COL_FIX: - (void)snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "key_format=r,value_format=7t," "allocation_size=%d," "internal_page_max=%d,internal_item_max=%d," "leaf_page_max=%d,leaf_item_max=%d", - PSIZE, PSIZE, OSIZE, PSIZE, OSIZE); + PSIZE, PSIZE, OSIZE, PSIZE, OSIZE)); break; case WT_PAGE_COL_VAR: - (void)snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "key_format=r," "allocation_size=%d," "internal_page_max=%d,internal_item_max=%d," "leaf_page_max=%d,leaf_item_max=%d", - PSIZE, PSIZE, OSIZE, PSIZE, OSIZE); + PSIZE, PSIZE, OSIZE, PSIZE, OSIZE)); break; case WT_PAGE_ROW_LEAF: - (void)snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "key_format=u," "allocation_size=%d," "internal_page_max=%d,internal_item_max=%d," "leaf_page_max=%d,leaf_item_max=%d", - PSIZE, PSIZE, OSIZE, PSIZE, OSIZE); + PSIZE, PSIZE, OSIZE, PSIZE, OSIZE)); break; default: assert(0); @@ -520,7 +521,8 @@ build(int ikey, int ivalue, int cnt) case WT_PAGE_COL_VAR: break; case WT_PAGE_ROW_LEAF: - snprintf(kbuf, sizeof(kbuf), "%010d KEY------", ikey); + testutil_check(__wt_snprintf( + kbuf, sizeof(kbuf), "%010d KEY------", ikey)); key.data = kbuf; key.size = 20; cursor->set_key(cursor, &key); @@ -533,8 +535,8 @@ build(int ikey, int ivalue, int cnt) break; case WT_PAGE_COL_VAR: case WT_PAGE_ROW_LEAF: - snprintf(vbuf, sizeof(vbuf), - "%010d VALUE----", value_unique ? ivalue : 37); + testutil_check(__wt_snprintf(vbuf, sizeof(vbuf), + "%010d VALUE----", value_unique ? ivalue : 37)); value.data = vbuf; value.size = 20; cursor->set_value(cursor, &value); @@ -621,9 +623,9 @@ process(void) /* Salvage. */ config[0] = '\0'; if (verbose) - snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "error_prefix=\"%s\",verbose=[salvage,verify],", - progname); + progname)); strcat(config, "log=(enabled=false),"); CHECK(wiredtiger_open(NULL, NULL, config, &conn) == 0); diff --git a/test/thread/file.c b/test/thread/file.c index 81ec6ad44f8..7a7d16c4cd6 100644 --- a/test/thread/file.c +++ b/test/thread/file.c @@ -33,20 +33,18 @@ file_create(const char *name) { WT_SESSION *session; int ret; - char *p, *end, config[128]; + char config[128]; if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) testutil_die(ret, "conn.session"); - p = config; - end = config + sizeof(config); - p += snprintf(p, (size_t)(end - p), + testutil_check(__wt_snprintf(config, sizeof(config), "key_format=%s," "internal_page_max=%d," - "leaf_page_max=%d,", - ftype == ROW ? "u" : "r", 16 * 1024, 128 * 1024); - if (ftype == FIX) - (void)snprintf(p, (size_t)(end - p), ",value_format=3t"); + "leaf_page_max=%d," + "%s", + ftype == ROW ? "u" : "r", 16 * 1024, 128 * 1024, + ftype == FIX ? ",value_format=3t" : "")); if ((ret = session->create(session, name, config)) != 0) if (ret != EEXIST) @@ -62,9 +60,10 @@ load(const char *name) WT_CURSOR *cursor; WT_ITEM *key, _key, *value, _value; WT_SESSION *session; - char keybuf[64], valuebuf[64]; - u_int keyno; + uint64_t keyno; + size_t len; int ret; + char keybuf[64], valuebuf[64]; file_create(name); @@ -79,18 +78,22 @@ load(const char *name) value = &_value; for (keyno = 1; keyno <= nkeys; ++keyno) { if (ftype == ROW) { + testutil_check(__wt_snprintf_len_set( + keybuf, sizeof(keybuf), + &len, "%017" PRIu64, keyno)); key->data = keybuf; - key->size = (uint32_t) - snprintf(keybuf, sizeof(keybuf), "%017u", keyno); + key->size = (uint32_t)len; cursor->set_key(cursor, key); } else - cursor->set_key(cursor, (uint32_t)keyno); - value->data = valuebuf; + cursor->set_key(cursor, keyno); if (ftype == FIX) cursor->set_value(cursor, 0x01); else { - value->size = (uint32_t) - snprintf(valuebuf, sizeof(valuebuf), "%37u", keyno); + testutil_check(__wt_snprintf_len_set( + valuebuf, sizeof(valuebuf), + &len, "%37" PRIu64, keyno)); + value->data = valuebuf; + value->size = (uint32_t)len; cursor->set_value(cursor, value); } if ((ret = cursor->insert(cursor)) != 0) diff --git a/test/thread/rw.c b/test/thread/rw.c index c6107a06c49..e8a2650ca51 100644 --- a/test/thread/rw.c +++ b/test/thread/rw.c @@ -66,7 +66,8 @@ rw_start(u_int readers, u_int writers) for (i = 0; i < writers; ++i) { if (i == 0 || multiple_files) { run_info[i].name = dmalloc(64); - snprintf(run_info[i].name, 64, FNAME, i); + testutil_check(__wt_snprintf( + run_info[i].name, 64, FNAME, i)); /* Vary by orders of magnitude */ if (vary_nops) @@ -88,8 +89,8 @@ rw_start(u_int readers, u_int writers) run_info[offset].name = dmalloc(64); /* Have readers read from tables with writes. */ name_index = i % writers; - snprintf( - run_info[offset].name, 64, FNAME, name_index); + testutil_check(__wt_snprintf( + run_info[offset].name, 64, FNAME, name_index)); /* Vary by orders of magnitude */ if (vary_nops) @@ -158,7 +159,8 @@ static inline void reader_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) { WT_ITEM *key, _key; - u_int keyno; + size_t len; + uint64_t keyno; int ret; char keybuf[64]; @@ -166,17 +168,18 @@ reader_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) keyno = __wt_random(&s->rnd) % nkeys + 1; if (ftype == ROW) { + testutil_check(__wt_snprintf_len_set( + keybuf, sizeof(keybuf), &len, "%017" PRIu64, keyno)); key->data = keybuf; - key->size = (uint32_t) - snprintf(keybuf, sizeof(keybuf), "%017u", keyno); + key->size = (uint32_t)len; cursor->set_key(cursor, key); } else - cursor->set_key(cursor, (uint32_t)keyno); + cursor->set_key(cursor, keyno); if ((ret = cursor->search(cursor)) != 0 && ret != WT_NOTFOUND) testutil_die(ret, "cursor.search"); if (log_print) testutil_check(session->log_printf(session, - "Reader Thread %p key %017u", pthread_self(), keyno)); + "Reader Thread %p key %017" PRIu64, pthread_self(), keyno)); } /* @@ -195,7 +198,7 @@ reader(void *arg) id = (int)(uintptr_t)arg; s = &run_info[id]; - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); __wt_random_init(&s->rnd); printf(" read thread %2d starting: tid: %s, file: %s\n", @@ -242,7 +245,8 @@ static inline void writer_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) { WT_ITEM *key, _key, *value, _value; - u_int keyno; + uint64_t keyno; + size_t len; int ret; char keybuf[64], valuebuf[64]; @@ -251,12 +255,13 @@ writer_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) keyno = __wt_random(&s->rnd) % nkeys + 1; if (ftype == ROW) { + testutil_check(__wt_snprintf_len_set( + keybuf, sizeof(keybuf), &len, "%017" PRIu64, keyno)); key->data = keybuf; - key->size = (uint32_t) - snprintf(keybuf, sizeof(keybuf), "%017u", keyno); + key->size = (uint32_t)len; cursor->set_key(cursor, key); } else - cursor->set_key(cursor, (uint32_t)keyno); + cursor->set_key(cursor, keyno); if (keyno % 5 == 0) { ++s->remove; if ((ret = @@ -268,8 +273,10 @@ writer_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) if (ftype == FIX) cursor->set_value(cursor, 0x10); else { - value->size = (uint32_t)snprintf( - valuebuf, sizeof(valuebuf), "XXX %37u", keyno); + testutil_check(__wt_snprintf_len_set( + valuebuf, sizeof(valuebuf), + &len, "XXX %37" PRIu64, keyno)); + value->size = (uint32_t)len; cursor->set_value(cursor, value); } if ((ret = cursor->update(cursor)) != 0) @@ -277,7 +284,7 @@ writer_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) } if (log_print) testutil_check(session->log_printf(session, - "Writer Thread %p key %017u", pthread_self(), keyno)); + "Writer Thread %p key %017" PRIu64, pthread_self(), keyno)); } /* @@ -296,7 +303,7 @@ writer(void *arg) id = (int)(uintptr_t)arg; s = &run_info[id]; - __wt_thread_id(tid, sizeof(tid)); + testutil_check(__wt_thread_id(tid, sizeof(tid))); __wt_random_init(&s->rnd); printf("write thread %2d starting: tid: %s, file: %s\n", diff --git a/test/thread/stats.c b/test/thread/stats.c index 67a2c02719b..839d65e8a4d 100644 --- a/test/thread/stats.c +++ b/test/thread/stats.c @@ -65,7 +65,8 @@ stats(void) /* File statistics. */ if (!multiple_files) { - (void)snprintf(name, sizeof(name), "statistics:" FNAME, 0); + testutil_check(__wt_snprintf( + name, sizeof(name), "statistics:" FNAME, 0)); if ((ret = session->open_cursor( session, name, NULL, NULL, &cursor)) != 0) testutil_die(ret, "session.open_cursor"); diff --git a/test/thread/t.c b/test/thread/t.c index 9dfd02bdad2..d2ed4c74bb7 100644 --- a/test/thread/t.c +++ b/test/thread/t.c @@ -185,19 +185,15 @@ wt_connect(char *config_open) }; int ret; char config[512]; - size_t print_count; testutil_clean_work_dir(home); testutil_make_work_dir(home); - print_count = (size_t)snprintf(config, sizeof(config), + testutil_check(__wt_snprintf(config, sizeof(config), "create,statistics=(all),error_prefix=\"%s\",%s%s", progname, config_open == NULL ? "" : ",", - config_open == NULL ? "" : config_open); - - if (print_count >= sizeof(config)) - testutil_die(EINVAL, "Config string too long"); + config_open == NULL ? "" : config_open)); if ((ret = wiredtiger_open(home, &event_handler, config, &conn)) != 0) testutil_die(ret, "wiredtiger_open"); diff --git a/test/utility/misc.c b/test/utility/misc.c index 61dad3d76c2..934dac86a7b 100644 --- a/test/utility/misc.c +++ b/test/utility/misc.c @@ -108,14 +108,14 @@ testutil_clean_work_dir(const char *dir) if ((buf = malloc(len)) == NULL) testutil_die(ENOMEM, "Failed to allocate memory"); - snprintf(buf, len, "%s %s %s %s", DIR_EXISTS_COMMAND, dir, - RM_COMMAND, dir); + testutil_check(__wt_snprintf( + buf, len, "%s %s %s %s", DIR_EXISTS_COMMAND, dir, RM_COMMAND, dir)); #else len = strlen(dir) + strlen(RM_COMMAND) + 1; if ((buf = malloc(len)) == NULL) testutil_die(ENOMEM, "Failed to allocate memory"); - snprintf(buf, len, "%s%s", RM_COMMAND, dir); + testutil_check(__wt_snprintf(buf, len, "%s%s", RM_COMMAND, dir)); #endif if ((ret = system(buf)) != 0 && ret != ENOENT) @@ -142,7 +142,7 @@ testutil_make_work_dir(char *dir) testutil_die(ENOMEM, "Failed to allocate memory"); /* mkdir shares syntax between Windows and Linux */ - snprintf(buf, len, "%s%s", MKDIR_COMMAND, dir); + testutil_check(__wt_snprintf(buf, len, "%s%s", MKDIR_COMMAND, dir)); if ((ret = system(buf)) != 0) testutil_die(ret, "%s", buf); free(buf); diff --git a/test/utility/parse_opts.c b/test/utility/parse_opts.c index af9256b199a..c3eff3360de 100644 --- a/test/utility/parse_opts.c +++ b/test/utility/parse_opts.c @@ -115,13 +115,15 @@ testutil_parse_opts(int argc, char * const *argv, TEST_OPTS *opts) if (opts->home == NULL) { len = strlen("WT_TEST.") + strlen(opts->progname) + 10; opts->home = dmalloc(len); - snprintf(opts->home, len, "WT_TEST.%s", opts->progname); + testutil_check(__wt_snprintf( + opts->home, len, "WT_TEST.%s", opts->progname)); } /* Setup the default URI string */ len = strlen("table:") + strlen(opts->progname) + 10; opts->uri = dmalloc(len); - snprintf(opts->uri, len, "table:%s", opts->progname); + testutil_check(__wt_snprintf( + opts->uri, len, "table:%s", opts->progname)); return (0); } diff --git a/test/utility/thread.c b/test/utility/thread.c index 38465b2f02b..122ad554442 100644 --- a/test/utility/thread.c +++ b/test/utility/thread.c @@ -57,8 +57,8 @@ thread_append(void *arg) if (opts->table_type == TABLE_FIX) cursor->set_value(cursor, buf[0]); else { - snprintf(buf, sizeof(buf), - "%" PRIu64 " VALUE ------", recno); + testutil_check(__wt_snprintf(buf, sizeof(buf), + "%" PRIu64 " VALUE ------", recno)); cursor->set_value(cursor, buf); } testutil_check(cursor->insert(cursor)); @@ -94,7 +94,8 @@ thread_insert_append(void *arg) session, opts->uri, NULL, NULL, &cursor)); for (i = 0; i < opts->nrecords; ++i) { - snprintf(kbuf, sizeof(kbuf), "%010d KEY------", (int)i); + testutil_check(__wt_snprintf( + kbuf, sizeof(kbuf), "%010d KEY------", (int)i)); cursor->set_key(cursor, kbuf); cursor->set_value(cursor, "========== VALUE ======="); testutil_check(cursor->insert(cursor)); diff --git a/test/windows/windows_shim.h b/test/windows/windows_shim.h index 648b991b1a2..8985904fb19 100644 --- a/test/windows/windows_shim.h +++ b/test/windows/windows_shim.h @@ -36,6 +36,8 @@ #include #include +#include "wt_internal.h" + #define inline __inline /* Define some POSIX types */ @@ -52,12 +54,7 @@ typedef int u_int; /* snprintf does not exist on <= VS 2013 */ #if _MSC_VER < 1900 -#define snprintf _wt_snprintf - -_Check_return_opt_ int __cdecl _wt_snprintf( - _Out_writes_(_MaxCount) char * _DstBuf, - _In_ size_t _MaxCount, - _In_z_ _Printf_format_string_ const char * _Format, ...); +#define snprintf __wt_snprintf #endif /* -- cgit v1.2.1 From 0641cc7b36a130111c19c955875862ed989a1beb Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 24 Mar 2017 08:59:59 -0400 Subject: WT-3136 bug fix: WiredTiger doesn't check sprintf calls for error return (#3347) Add a style check for use of the snprintf/vsnprintf calls rather than the WiredTiger library replacements. Fix a wtperf snprintf call I missed. --- bench/wtperf/wtperf.c | 4 ++-- dist/s_style | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 1eedaba4f32..80416cfdd5c 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -2608,8 +2608,8 @@ main(int argc, char *argv[]) append_comma = ","; } if (opts->in_memory) { - pos += (size_t)snprintf( - cc_buf + pos, req_len - pos, "%s%s", + testutil_check(__wt_snprintf_len_incr( + cc_buf + pos, req_len - pos, &pos, "%s%s", append_comma, "in_memory=true"); append_comma = ","; } diff --git a/dist/s_style b/dist/s_style index 8e755224ee2..388a481ef56 100755 --- a/dist/s_style +++ b/dist/s_style @@ -93,6 +93,14 @@ else cat $t fi + if ! expr "$f" : 'examples/c/*' > /dev/null && + ! expr "$f" : 'ext/*' > /dev/null && + ! expr "$f" : 'src/os_posix/os_snprintf.c' > /dev/null && + egrep '[^a-z_]snprintf\(|[^a-z_]vsnprintf\(' $f > $t; then + echo "$f: snprintf call, use WiredTiger library replacements" + cat $t + fi + # Alignment directive before "struct". egrep 'WT_COMPILER_TYPE_ALIGN.*struct' $f > $t test -s $t && { -- cgit v1.2.1 From aba8062d15b6a255542e68b5266fcb61aaa2838c Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 24 Mar 2017 09:37:36 -0400 Subject: WT-3136 bug fix: WiredTiger doesn't check sprintf calls for error return (#3348) Fix a typo. --- bench/wtperf/wtperf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 80416cfdd5c..bdc0b0f3b3c 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -2610,7 +2610,7 @@ main(int argc, char *argv[]) if (opts->in_memory) { testutil_check(__wt_snprintf_len_incr( cc_buf + pos, req_len - pos, &pos, "%s%s", - append_comma, "in_memory=true"); + append_comma, "in_memory=true")); append_comma = ","; } if (sess_cfg != NULL && strlen(sess_cfg) != 0) { -- cgit v1.2.1 From e552b240c997dba9434cd3d2d5b563bec7df5b96 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 24 Mar 2017 11:24:02 -0400 Subject: WT-98 Update the current cursor value without a search Revert "Change LSM WT_CURSOR.{compare,insert,update,remove} to accept an internal key instead of copying the key into WiredTiger-owned memory (in other words, replace WT_CURSOR_NEEDKEY calls with WT_CURSOR_CHECKKEY)." This reverts commit af2c787. --- src/lsm/lsm_cursor.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 0de39b38370..52265f02e62 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -844,8 +844,8 @@ __clsm_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) WT_ERR_MSG(session, EINVAL, "comparison method cursors must reference the same object"); - WT_CURSOR_CHECKKEY(a); - WT_CURSOR_CHECKKEY(b); + WT_CURSOR_NEEDKEY(a); + WT_CURSOR_NEEDKEY(b); WT_ERR(__wt_compare( session, alsm->lsm_tree->collator, &a->key, &b->key, cmpp)); @@ -1521,7 +1521,7 @@ __clsm_insert(WT_CURSOR *cursor) clsm = (WT_CURSOR_LSM *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, insert, NULL); - WT_CURSOR_CHECKKEY(cursor); + WT_CURSOR_NEEDKEY(cursor); WT_CURSOR_NEEDVALUE(cursor); WT_ERR(__clsm_enter(clsm, false, true)); @@ -1565,7 +1565,7 @@ __clsm_update(WT_CURSOR *cursor) clsm = (WT_CURSOR_LSM *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, update, NULL); - WT_CURSOR_CHECKKEY(cursor); + WT_CURSOR_NEEDKEY(cursor); WT_CURSOR_NEEDVALUE(cursor); WT_ERR(__clsm_enter(clsm, false, true)); @@ -1612,7 +1612,7 @@ __clsm_remove(WT_CURSOR *cursor) positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT); CURSOR_REMOVE_API_CALL(cursor, session, NULL); - WT_CURSOR_CHECKKEY(cursor); + WT_CURSOR_NEEDKEY(cursor); WT_CURSOR_NOVALUE(cursor); WT_ERR(__clsm_enter(clsm, false, true)); -- cgit v1.2.1 From c2bde1ea5a810f47f26fb7a6e70fe9612ea15f1f Mon Sep 17 00:00:00 2001 From: sueloverso Date: Sun, 26 Mar 2017 21:48:14 -0400 Subject: WT-3207 Use config to determine checkpoint force value. (#3350) --- src/txn/txn_ckpt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 5ec8aa19e4c..f4ccf5eacd0 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -1599,7 +1599,9 @@ __checkpoint_tree_helper(WT_SESSION_IMPL *session, const char *cfg[]) int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) { + WT_CONFIG_ITEM cval; WT_DECL_RET; + bool force; /* Should not be called with a checkpoint handle. */ WT_ASSERT(session, session->dhandle->checkpoint == NULL); @@ -1608,8 +1610,10 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_ASSERT(session, !WT_IS_METADATA(session->dhandle) || F_ISSET(session, WT_SESSION_LOCKED_METADATA)); + WT_RET(__wt_config_gets_def(session, cfg, "force", 0, &cval)); + force = cval.val != 0; WT_SAVE_DHANDLE(session, ret = __checkpoint_lock_dirty_tree( - session, true, false, true, cfg)); + session, true, force, true, cfg)); WT_RET(ret); if (F_ISSET(S2BT(session), WT_BTREE_SKIP_CKPT)) return (0); -- cgit v1.2.1 From e36d8cdb2748ad5b6713b824bbe7be0c8f11c14d Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 27 Mar 2017 09:18:34 -0400 Subject: WT-3240 Coverity reports (#3354) * WT-3240 Coverity reports Coverity report 1373075: allocated memory is leaked if __wt_snprintf fails. * Coverity report 1373074: allocated memory is leaked if __wt_snprintf fails. * Coverity report 1373073: allocated memory is leaked if __wt_snprintf fails. * Coverity report 1373072: allocated memory is leaked if __wt_snprintf fails. * Coverity report 1373071: allocated memory is leaked if __wt_snprintf fails. * Coverity report 1369053: CID 1369053 (#1 of 1): Unused value (UNUSED_VALUE) assigned_pointer: Assigning value from "," to append_comma here, but that stored value is overwritten before it can be used. --- bench/wtperf/wtperf.c | 2 -- src/config/config_api.c | 2 +- src/os_common/filename.c | 18 ++++++++++-------- src/utilities/util_dump.c | 4 +++- src/utilities/util_load_json.c | 8 +++++--- src/utilities/util_main.c | 12 ++++++------ 6 files changed, 25 insertions(+), 21 deletions(-) diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index bdc0b0f3b3c..6d79eebe8b2 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -2623,7 +2623,6 @@ main(int argc, char *argv[]) testutil_check(__wt_snprintf_len_incr( cc_buf + pos, req_len - pos, &pos, "%s%s", append_comma, user_cconfig)); - append_comma = ","; } if (strlen(cc_buf) != 0 && (ret = @@ -2658,7 +2657,6 @@ main(int argc, char *argv[]) testutil_check(__wt_snprintf_len_incr( tc_buf + pos, req_len - pos, &pos, "%s%s", append_comma, user_tconfig)); - append_comma = ","; } if (strlen(tc_buf) != 0 && (ret = diff --git a/src/config/config_api.c b/src/config/config_api.c index 88e173459f9..c1299baaafe 100644 --- a/src/config/config_api.c +++ b/src/config/config_api.c @@ -278,8 +278,8 @@ __wt_configure_method(WT_SESSION_IMPL *session, entry->method = (*epp)->method; len = strlen((*epp)->base) + strlen(",") + strlen(config) + 1; WT_ERR(__wt_calloc_def(session, len, &p)); - WT_ERR(__wt_snprintf(p, len, "%s,%s", (*epp)->base, config)); entry->base = p; + WT_ERR(__wt_snprintf(p, len, "%s,%s", (*epp)->base, config)); /* * There may be a default value in the config argument passed in (for diff --git a/src/os_common/filename.c b/src/os_common/filename.c index f803144a3fb..d5695f63d91 100644 --- a/src/os_common/filename.c +++ b/src/os_common/filename.c @@ -29,6 +29,7 @@ int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path) { + WT_DECL_RET; size_t len; char *buf; @@ -39,16 +40,17 @@ __wt_nfilename( * the exists API which is used by the test utilities. */ if (session == NULL || __wt_absolute_path(name)) - WT_RET(__wt_strndup(session, name, namelen, path)); - else { - len = strlen(S2C(session)->home) + 1 + namelen + 1; - WT_RET(__wt_calloc(session, 1, len, &buf)); - WT_RET(__wt_snprintf(buf, len, "%s%s%.*s", S2C(session)->home, - __wt_path_separator(), (int)namelen, name)); - *path = buf; - } + return (__wt_strndup(session, name, namelen, path)); + len = strlen(S2C(session)->home) + 1 + namelen + 1; + WT_RET(__wt_calloc(session, 1, len, &buf)); + WT_ERR(__wt_snprintf(buf, len, "%s%s%.*s", + S2C(session)->home, __wt_path_separator(), (int)namelen, name)); + *path = buf; return (0); + +err: __wt_free(session, buf); + return (ret); } /* diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c index 238e2757099..955148b7d46 100644 --- a/src/utilities/util_dump.c +++ b/src/utilities/util_dump.c @@ -437,8 +437,10 @@ dump_table_parts_config(WT_SESSION *session, WT_CURSOR *cursor, len = strlen(entry) + strlen(name) + 1; if ((uriprefix = malloc(len)) == NULL) return (util_err(session, errno, NULL)); - if ((ret = __wt_snprintf(uriprefix, len, "%s%s", entry, name)) != 0) + if ((ret = __wt_snprintf(uriprefix, len, "%s%s", entry, name)) != 0) { + free(uriprefix); return (util_err(session, ret, NULL)); + } /* * Search the file looking for column group and index key/value pairs: diff --git a/src/utilities/util_load_json.c b/src/utilities/util_load_json.c index af5c2576b26..c693e2b7651 100644 --- a/src/utilities/util_load_json.c +++ b/src/utilities/util_load_json.c @@ -153,13 +153,15 @@ json_kvraw_append(WT_SESSION *session, needsize = strlen(ins->kvraw) + len + 2; if ((tmp = malloc(needsize)) == NULL) return (util_err(session, errno, NULL)); - if ((ret = __wt_snprintf( - tmp, needsize, "%s %.*s", ins->kvraw, (int)len, str)) != 0) - return (util_err(session, ret, NULL)); + WT_ERR(__wt_snprintf( + tmp, needsize, "%s %.*s", ins->kvraw, (int)len, str)); free(ins->kvraw); ins->kvraw = tmp; } return (0); + +err: free(tmp); + return (util_err(session, ret, NULL)); } /* diff --git a/src/utilities/util_main.c b/src/utilities/util_main.c index 2b4ef36081a..c6f225bb667 100644 --- a/src/utilities/util_main.c +++ b/src/utilities/util_main.c @@ -326,12 +326,12 @@ util_uri(WT_SESSION *session, const char *s, const char *type) * the default type for the operation. */ if (strchr(s, ':') != NULL) - ret = __wt_snprintf(name, len, "%s", s); + WT_ERR(__wt_snprintf(name, len, "%s", s)); else - ret = __wt_snprintf(name, len, "%s:%s", type, s); - if (ret != 0) { - (void)util_err(session, ret, NULL); - return (NULL); - } + WT_ERR(__wt_snprintf(name, len, "%s:%s", type, s)); return (name); + +err: free(name); + (void)util_err(session, ret, NULL); + return (NULL); } -- cgit v1.2.1 From a5b3166ab7bcdb365b60686246b8e5624efeca84 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 27 Mar 2017 09:44:45 -0400 Subject: SERVER-28168 Cannot start or repair mongodb after unexpected shutdown. (#3353) Panic if there's an error in reading/writing from/to the turtle file, there's no point in continuing. This change avoids user confusion when the turtle file is corrupted or zero'd out by the filesystem. --- src/meta/meta_turtle.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/meta/meta_turtle.c b/src/meta/meta_turtle.c index 66e34c728f2..5a089471059 100644 --- a/src/meta/meta_turtle.c +++ b/src/meta/meta_turtle.c @@ -242,7 +242,7 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_DECL_ITEM(buf); WT_DECL_RET; WT_FSTREAM *fs; - bool exist, match; + bool exist; *valuep = NULL; @@ -258,22 +258,19 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) __metadata_config(session, valuep) : WT_NOTFOUND); WT_RET(__wt_fopen(session, WT_METADATA_TURTLE, 0, WT_STREAM_READ, &fs)); - /* Search for the key. */ WT_ERR(__wt_scr_alloc(session, 512, &buf)); - for (match = false;;) { + + /* Search for the key. */ + do { WT_ERR(__wt_getline(session, fs, buf)); if (buf->size == 0) WT_ERR(WT_NOTFOUND); - if (strcmp(key, buf->data) == 0) - match = true; + } while (strcmp(key, buf->data) != 0); - /* Key matched: read the subsequent line for the value. */ - WT_ERR(__wt_getline(session, fs, buf)); - if (buf->size == 0) - WT_ERR(__wt_illegal_value(session, WT_METADATA_TURTLE)); - if (match) - break; - } + /* Key matched: read the subsequent line for the value. */ + WT_ERR(__wt_getline(session, fs, buf)); + if (buf->size == 0) + WT_ERR(WT_NOTFOUND); /* Copy the value for the caller. */ WT_ERR(__wt_strdup(session, buf->data, valuep)); @@ -283,7 +280,12 @@ err: WT_TRET(__wt_fclose(session, &fs)); if (ret != 0) __wt_free(session, *valuep); - return (ret); + + /* + * A file error or a missing key/value pair in the turtle file means + * something has gone horribly wrong -- we're done. + */ + return (ret == 0 ? 0 : __wt_illegal_value(session, WT_METADATA_TURTLE)); } /* @@ -322,5 +324,9 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) err: WT_TRET(__wt_fclose(session, &fs)); WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false)); - return (ret); + /* + * An error updating the turtle file means something has gone horribly + * wrong -- we're done. + */ + return (ret == 0 ? 0 : __wt_illegal_value(session, WT_METADATA_TURTLE)); } -- cgit v1.2.1 From d5a10d2e97853e7db6bb4c2635b97febf13607c5 Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Tue, 28 Mar 2017 20:08:23 -0400 Subject: WT-3238 Java: Fix Cursor.compare and Cursor.equals to return int values. (#3355) Non-zero int values for these functions should not raise exceptions. --- lang/java/Makefile.am | 1 + lang/java/wiredtiger.i | 10 +- test/java/com/wiredtiger/test/CursorTest03.java | 175 +++++++++++++++++++++ test/java/com/wiredtiger/test/WiredTigerSuite.java | 1 + 4 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 test/java/com/wiredtiger/test/CursorTest03.java diff --git a/lang/java/Makefile.am b/lang/java/Makefile.am index 7184fe610dc..2ff822a5d08 100644 --- a/lang/java/Makefile.am +++ b/lang/java/Makefile.am @@ -49,6 +49,7 @@ JAVA_JUNIT = \ $(JAVATEST)/ConcurrentCloseTest.java \ $(JAVATEST)/CursorTest.java \ $(JAVATEST)/CursorTest02.java \ + $(JAVATEST)/CursorTest03.java \ $(JAVATEST)/ExceptionTest.java \ $(JAVATEST)/PackTest.java \ $(JAVATEST)/PackTest02.java \ diff --git a/lang/java/wiredtiger.i b/lang/java/wiredtiger.i index efc512f2f5a..275b708090c 100644 --- a/lang/java/wiredtiger.i +++ b/lang/java/wiredtiger.i @@ -319,6 +319,15 @@ WT_CLASS(struct __wt_async_op, WT_ASYNC_OP, op) %rename (getValueFormat) __wt_async_op::getValue_format; %rename (getType) __wt_async_op::get_type; +/* + * Special cases: override the out typemap, return checking is done in the + * wrapper. + */ +%typemap(out) int __wt_cursor::compare_wrap, + int __wt_cursor::equals_wrap %{ + $result = $1; +%} + /* SWIG magic to turn Java byte strings into data / size. */ %apply (char *STRING, int LENGTH) { (char *data, int size) }; @@ -529,7 +538,6 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; %} %extend __wt_async_op { - %javamethodmodifiers get_key_wrap "protected"; WT_ITEM get_key_wrap(JNIEnv *jenv) { WT_ITEM k; diff --git a/test/java/com/wiredtiger/test/CursorTest03.java b/test/java/com/wiredtiger/test/CursorTest03.java new file mode 100644 index 00000000000..64f33f4d7b6 --- /dev/null +++ b/test/java/com/wiredtiger/test/CursorTest03.java @@ -0,0 +1,175 @@ +/*- + * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +package com.wiredtiger.test; + +import com.wiredtiger.db.Connection; +import com.wiredtiger.db.Cursor; +import com.wiredtiger.db.SearchStatus; +import com.wiredtiger.db.Session; +import com.wiredtiger.db.WiredTigerPackingException; +import com.wiredtiger.db.WiredTigerException; +import com.wiredtiger.db.wiredtiger; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; +import org.junit.Assert; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/* + * Test cases for WT-3238. + * + * Most WiredTiger methods return int, and our SWIG typemaps for Java add + * checking that throws exceptions for non-zero returns. Certain methods + * (Cursor.compare, Cursor.equals) are declared as returning int in Java, + * but should not throw exceptions for normal returns (which may be + * non-zero). + */ +public class CursorTest03 { + Connection conn; + Session s; + static String values[] = { "key0", "key1" }; + + @Test + public void cursor_int_methods() + throws WiredTigerPackingException { + setup(); + + Cursor c1 = s.open_cursor("table:t", null, null); + Cursor c2 = s.open_cursor("table:t", null, null); + for (String s : values) { + c1.putKeyString(s); + c1.putValueString(s); + c1.insert(); + } + c1.reset(); + + // "key1" compared to "key1" + c1.putKeyString(values[1]); + Assert.assertEquals(c1.search_near(), SearchStatus.FOUND); + c2.putKeyString(values[1]); + Assert.assertEquals(c2.search_near(), SearchStatus.FOUND); + Assert.assertEquals(c1.compare(c2), 0); + Assert.assertEquals(c2.compare(c1), 0); + Assert.assertEquals(c1.compare(c1), 0); + Assert.assertEquals(c1.equals(c2), 1); + Assert.assertEquals(c2.equals(c1), 1); + Assert.assertEquals(c1.equals(c1), 1); + + // "key0" compared to "key1" + c1.putKeyString(values[0]); + Assert.assertEquals(c1.search_near(), SearchStatus.FOUND); + Assert.assertEquals(c1.compare(c2), -1); + Assert.assertEquals(c2.compare(c1), 1); + Assert.assertEquals(c1.equals(c2), 0); + Assert.assertEquals(c2.equals(c1), 0); + + c1.close(); + c2.close(); + teardown(); + } + + public void expectException(Cursor c1, Cursor c2) + { + boolean caught = false; + try { + c1.compare(c2); + } + catch (WiredTigerException wte) { + caught = true; + } + Assert.assertTrue(caught); + + caught = false; + try { + c1.equals(c2); + } + catch (WiredTigerException wte) { + caught = true; + } + Assert.assertTrue(caught); + } + + @Test + public void cursor_int_methods_errors() + throws WiredTigerPackingException { + setup(); + + Cursor c1 = s.open_cursor("table:t", null, null); + Cursor c2 = s.open_cursor("table:t", null, null); + Cursor cx = s.open_cursor("table:t2", null, null); + for (String s : values) { + c1.putKeyString(s); + c1.putValueString(s); + c1.insert(); + cx.putKeyString(s); + cx.putValueString(s); + cx.insert(); + } + c1.reset(); + cx.reset(); + + // With both cursors not set, should be an exception. + expectException(c1, c2); + expectException(c1, c2); + + // With any one cursor not set, should be an exception. + c1.putKeyString(values[1]); + Assert.assertEquals(c1.search_near(), SearchStatus.FOUND); + expectException(c1, c2); + expectException(c1, c2); + + // With two cursors from different tables, should be an exception. + cx.putKeyString(values[1]); + Assert.assertEquals(cx.search_near(), SearchStatus.FOUND); + expectException(c1, cx); + expectException(c1, cx); + + c1.close(); + c2.close(); + cx.close(); + teardown(); + } + + private void setup() { + conn = wiredtiger.open("WT_HOME", "create"); + s = conn.open_session(null); + s.create("table:t", "key_format=S,value_format=S"); + s.create("table:t2", "key_format=S,value_format=S"); + } + + private void teardown() { + s.drop("table:t", ""); + s.drop("table:t2", ""); + s.close(""); + conn.close(""); + } + +} + diff --git a/test/java/com/wiredtiger/test/WiredTigerSuite.java b/test/java/com/wiredtiger/test/WiredTigerSuite.java index 5bd98d53fac..9322d30671a 100644 --- a/test/java/com/wiredtiger/test/WiredTigerSuite.java +++ b/test/java/com/wiredtiger/test/WiredTigerSuite.java @@ -38,6 +38,7 @@ import org.junit.runners.Suite; ConfigTest.class, CursorTest.class, CursorTest02.class, + CursorTest03.class, ExceptionTest.class, PackTest.class, PackTest02.class, -- cgit v1.2.1 From 1c41c7735b3529521b7bd34180f80584caee7f59 Mon Sep 17 00:00:00 2001 From: Sulabh Mahajan Date: Wed, 29 Mar 2017 15:38:35 +1100 Subject: WT-2439 Improve page layout: keep pages more than half full (#3277) * Changes `split_pct` to have a minimum of 50%. --- dist/api_data.py | 4 +- src/btree/bt_handle.c | 9 +- src/config/config_def.c | 16 +- src/include/btree.h | 6 + src/include/wiredtiger.in | 4 +- src/reconcile/rec_write.c | 954 ++++++++++++++++++++++++---------------------- test/format/config.h | 2 +- 7 files changed, 532 insertions(+), 463 deletions(-) diff --git a/dist/api_data.py b/dist/api_data.py index 1d669fa7fe0..22600dd5e29 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -295,12 +295,12 @@ file_config = format_meta + file_runtime_config + [ Config('split_deepen_per_child', '0', r''' entries allocated per child when deepening the tree''', type='int', undoc=True), - Config('split_pct', '75', r''' + Config('split_pct', '90', r''' the Btree page split size as a percentage of the maximum Btree page size, that is, when a Btree page is split, it will be split into smaller pages, where each page is the specified percentage of the maximum Btree page size''', - min='25', max='100'), + min='50', max='100'), ] # File metadata, including both configurable and non-configurable (internal) diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index f2bffee06da..98c246fb897 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -780,9 +780,16 @@ __btree_page_sizes(WT_SESSION_IMPL *session) * Get the split percentage (reconciliation splits pages into smaller * than the maximum page size chunks so we don't split every time a * new entry is added). Determine how large newly split pages will be. + * Set to the minimum, if the read value is less than that. */ WT_RET(__wt_config_gets(session, cfg, "split_pct", &cval)); - btree->split_pct = (int)cval.val; + if (cval.val < WT_BTREE_MIN_SPLIT_PCT) { + btree->split_pct = WT_BTREE_MIN_SPLIT_PCT; + WT_RET(__wt_msg(session, + "Re-setting split_pct for %s to the minimum allowed of " + "%d%%.", session->dhandle->name, WT_BTREE_MIN_SPLIT_PCT)); + } else + btree->split_pct = (int)cval.val; intl_split_size = __wt_split_page_size(btree, btree->maxintlpage); leaf_split_size = __wt_split_page_size(btree, btree->maxleafpage); diff --git a/src/config/config_def.c b/src/config/config_def.c index b11a8d63fdb..f152fbacad4 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -294,7 +294,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = { { "source", "string", NULL, NULL, NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "type", "string", NULL, NULL, NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, @@ -466,7 +466,7 @@ static const WT_CONFIG_CHECK confchk_file_config[] = { { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, @@ -530,7 +530,7 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = { { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, @@ -614,7 +614,7 @@ static const WT_CONFIG_CHECK confchk_lsm_meta[] = { { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, @@ -1119,7 +1119,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "chunk_size=10MB,merge_max=15,merge_min=0),memory_page_max=5MB," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,source=,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=75,type=file,value_format=u", + "split_deepen_per_child=0,split_pct=90,type=file,value_format=u", confchk_WT_SESSION_create, 42 }, { "WT_SESSION.drop", @@ -1213,7 +1213,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "leaf_value_max=0,log=(enabled=true),memory_page_max=5MB," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=75,value_format=u", + "split_deepen_per_child=0,split_pct=90,value_format=u", confchk_file_config, 35 }, { "file.meta", @@ -1228,7 +1228,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=0," "log=(enabled=true),memory_page_max=5MB,os_cache_dirty_max=0," "os_cache_max=0,prefix_compression=false,prefix_compression_min=4" - ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75," + ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90," "value_format=u,version=(major=0,minor=0)", confchk_file_meta, 39 }, @@ -1253,7 +1253,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "merge_min=0),memory_page_max=5MB,old_chunks=," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=75,value_format=u", + "split_deepen_per_child=0,split_pct=90,value_format=u", confchk_lsm_meta, 39 }, { "table.meta", diff --git a/src/include/btree.h b/src/include/btree.h index 88312f408cc..28fe1b94b23 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -57,6 +57,12 @@ /* Evict pages if we see this many consecutive deleted records. */ #define WT_BTREE_DELETE_THRESHOLD 1000 +/* + * Minimum size of the chunks (in percentage of the page size) a page gets split + * into during reconciliation. + */ +#define WT_BTREE_MIN_SPLIT_PCT 50 + /* * WT_BTREE -- * A btree handle. diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 558e93d3de0..707159ef6ae 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1242,8 +1242,8 @@ struct __wt_session { * @config{split_pct, the Btree page split size as a percentage of the * maximum Btree page size\, that is\, when a Btree page is split\, it * will be split into smaller pages\, where each page is the specified - * percentage of the maximum Btree page size., an integer between 25 and - * 100; default \c 75.} + * percentage of the maximum Btree page size., an integer between 50 and + * 100; default \c 90.} * @config{type, set the type of data source used to store a column * group\, index or simple table. By default\, a \c "file:" URI is * derived from the object name. The \c type configuration can be used diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 23f654caa70..e18d44f96ff 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -26,6 +26,11 @@ typedef struct { uint32_t flags; /* Caller's configuration */ WT_ITEM disk_image; /* Temporary disk-image buffer */ + /* + * Temporary buffer used to write out a disk image when managing two + * chunks worth of data in memory + */ + WT_ITEM *interim_buf; /* * Track start/stop write generation to decide if all changes to the @@ -127,6 +132,7 @@ typedef struct { * repeatedly split a packed page. */ uint32_t split_size; /* Split page size */ + uint32_t min_split_size; /* Minimum split page size */ /* * The problem with splits is we've done a lot of work by the time we @@ -151,16 +157,6 @@ typedef struct { */ size_t offset; /* Split's first byte */ - /* - * The recno and entries fields are the starting record number - * of the split chunk (for column-store splits), and the number - * of entries in the split chunk. These fields are used both - * to write the split chunk, and to create a new internal page - * to reference the split pages. - */ - uint64_t recno; /* Split's starting record */ - uint32_t entries; /* Split's entries */ - WT_ADDR addr; /* Split's written location */ uint32_t size; /* Split's size */ uint32_t checksum; /* Split's checksum */ @@ -182,39 +178,42 @@ typedef struct { size_t supd_allocated; /* + * While reconciling pages, at any given time, we maintain two + * split chunks in the memory to be written out as pages. As we + * get to the last two chunks, if the last one turns out to be + * smaller than the minimum split size, we go back into the + * penultimate chunk and split at this minimum split size + * boundary. This moves some data from the penultimate chunk to + * the last chunk, hence increasing the size of the last page + * written without decreasing the penultimate page size beyond + * the minimum split size. For this reason, we maintain both a + * maximum split percentage boundary and a minimum split + * percentage boundary. + * + * The recno and entries fields are the starting record number + * of the split chunk (for column-store splits), and the number + * of entries in the split chunk. These fields are used both to + * write the split chunk, and to create a new internal page to + * reference the split pages. + * * The key for a row-store page; no column-store key is needed * because the page's recno, stored in the recno field, is the * column-store key. */ - WT_ITEM key; /* Promoted row-store key */ + uint32_t max_bnd_entries; + uint64_t max_bnd_recno; + WT_ITEM max_bnd_key; + + size_t min_bnd_offset; + uint32_t min_bnd_entries; + uint64_t min_bnd_recno; + WT_ITEM min_bnd_key; } *bnd; /* Saved boundaries */ uint32_t bnd_next; /* Next boundary slot */ uint32_t bnd_next_max; /* Maximum boundary slots used */ size_t bnd_entries; /* Total boundary slots */ size_t bnd_allocated; /* Bytes allocated */ - /* - * We track the total number of page entries copied into split chunks - * so we can easily figure out how many entries in the current split - * chunk. - */ - uint32_t total_entries; /* Total entries in splits */ - - /* - * And there's state information as to where in this process we are: - * (1) tracking split boundaries because we can still fit more split - * chunks into the maximum page size, (2) tracking the maximum page - * size boundary because we can't fit any more split chunks into the - * maximum page size, (3) not performing boundary checks because it's - * either not useful with the current page size configuration, or - * because we've already been forced to split. - */ - enum { SPLIT_BOUNDARY=0, /* Next: a split page boundary */ - SPLIT_MAX=1, /* Next: the maximum page boundary */ - SPLIT_TRACKING_OFF=2, /* No boundary checks */ - SPLIT_TRACKING_RAW=3 } /* Underlying compression decides */ - bnd_state; - /* * We track current information about the current record number, the * number of entries copied into the temporary buffer, where we are @@ -293,6 +292,14 @@ typedef struct { uint32_t tested_ref_state; /* Debugging information */ } WT_RECONCILE; +#define WT_CROSSING_MIN_BND(r, next_len) \ + ((r)->bnd[(r)->bnd_next].min_bnd_offset == 0 && \ + ((r)->space_avail - (next_len)) < \ + ((r)->split_size - (r)->min_split_size)) +#define WT_CROSSING_SPLIT_BND(r, next_len) ((next_len) > (r)->space_avail) +#define WT_CHECK_CROSSING_BND(r, next_len) \ + (WT_CROSSING_MIN_BND(r, next_len) || WT_CROSSING_SPLIT_BND(r, next_len)) + static void __rec_bnd_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *, bool); static void __rec_cell_build_addr(WT_SESSION_IMPL *, WT_RECONCILE *, const void *, size_t, u_int, uint64_t); @@ -314,6 +321,7 @@ static int __rec_col_var(WT_SESSION_IMPL *, static int __rec_col_var_helper(WT_SESSION_IMPL *, WT_RECONCILE *, WT_SALVAGE_COOKIE *, WT_ITEM *, bool, uint8_t, uint64_t); static int __rec_destroy_session(WT_SESSION_IMPL *); +static uint32_t __rec_min_split_page_size(WT_BTREE *, uint32_t); static int __rec_root_write(WT_SESSION_IMPL *, WT_PAGE *, uint32_t); static int __rec_row_int(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_row_leaf(WT_SESSION_IMPL *, @@ -323,7 +331,6 @@ static int __rec_row_leaf_insert( static int __rec_row_merge(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_split_col(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_split_discard(WT_SESSION_IMPL *, WT_PAGE *); -static int __rec_split_fixup(WT_SESSION_IMPL *, WT_RECONCILE *); static int __rec_split_row(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_split_row_promote( WT_SESSION_IMPL *, WT_RECONCILE *, WT_ITEM *, uint8_t); @@ -968,6 +975,7 @@ __rec_destroy(WT_SESSION_IMPL *session, void *reconcilep) *(WT_RECONCILE **)reconcilep = NULL; __wt_buf_free(session, &r->disk_image); + __wt_scr_free(session, &r->interim_buf); __wt_free(session, r->raw_entries); __wt_free(session, r->raw_offsets); @@ -1032,7 +1040,8 @@ __rec_bnd_cleanup(WT_SESSION_IMPL *session, WT_RECONCILE *r, bool destroy) __wt_free(session, bnd->addr.addr); __wt_free(session, bnd->disk_image); __wt_free(session, bnd->supd); - __wt_buf_free(session, &bnd->key); + __wt_buf_free(session, &bnd->max_bnd_key); + __wt_buf_free(session, &bnd->min_bnd_key); } __wt_free(session, r->bnd); r->bnd_next = 0; @@ -1927,8 +1936,8 @@ static void __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) { bnd->offset = 0; - bnd->recno = WT_RECNO_OOB; - bnd->entries = 0; + bnd->max_bnd_recno = WT_RECNO_OOB; + bnd->max_bnd_entries = 0; __wt_free(session, bnd->addr.addr); WT_CLEAR(bnd->addr); @@ -1943,6 +1952,10 @@ __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) bnd->already_compressed = false; + bnd->min_bnd_offset = 0; + bnd->min_bnd_entries = 0; + bnd->min_bnd_recno = WT_RECNO_OOB; + /* * Don't touch the key, we re-use that memory in each new * reconciliation. @@ -1974,39 +1987,63 @@ __rec_split_bnd_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r) } /* - * __wt_split_page_size -- - * Split page size calculation: we don't want to repeatedly split every - * time a new entry is added, so we split to a smaller-than-maximum page size. + * __rec_split_page_size_from_pct -- + * Given a split percentage, calculate split page size in bytes. */ -uint32_t -__wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) -{ +static uint32_t +__rec_split_page_size_from_pct( + int split_pct, uint32_t maxpagesize, uint32_t allocsize) { uintmax_t a; uint32_t split_size; /* * Ideally, the split page size is some percentage of the maximum page - * size rounded to an allocation unit (round to an allocation unit so - * we don't waste space when we write). + * size rounded to an allocation unit (round to an allocation unit so we + * don't waste space when we write). */ a = maxpagesize; /* Don't overflow. */ split_size = (uint32_t)WT_ALIGN_NEAREST( - (a * (u_int)btree->split_pct) / 100, btree->allocsize); + (a * (u_int)split_pct) / 100, allocsize); /* - * Respect the configured split percentage if the calculated split - * size is either zero or a full page. The user has either configured - * an allocation size that matches the page size, or a split - * percentage that is close to zero or one hundred. Rounding is going - * to provide a worse outcome than having a split point that doesn't - * fall on an allocation size boundary in those cases. + * Respect the configured split percentage if the calculated split size + * is either zero or a full page. The user has either configured an + * allocation size that matches the page size, or a split percentage + * that is close to zero or one hundred. Rounding is going to provide a + * worse outcome than having a split point that doesn't fall on an + * allocation size boundary in those cases. */ if (split_size == 0 || split_size == maxpagesize) - split_size = (uint32_t)((a * (u_int)btree->split_pct) / 100); + split_size = (uint32_t)((a * (u_int)split_pct) / 100); return (split_size); } +/* + * __wt_split_page_size -- + * Split page size calculation: we don't want to repeatedly split every + * time a new entry is added, so we split to a smaller-than-maximum page size. + */ +uint32_t +__wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) +{ + return (__rec_split_page_size_from_pct( + btree->split_pct, maxpagesize, btree->allocsize)); +} + +/* + * __rec_min_split_page_size -- + * Minimum split size boundary calculation: To track a boundary at the + * minimum split size that we could have split at instead of splitting at + * the split page size. + */ +static uint32_t +__rec_min_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) +{ + return (__rec_split_page_size_from_pct( + WT_BTREE_MIN_SPLIT_PCT, maxpagesize, btree->allocsize)); +} + /* * __rec_split_init -- * Initialization for the reconciliation split functions. @@ -2018,7 +2055,7 @@ __rec_split_init(WT_SESSION_IMPL *session, WT_BM *bm; WT_BTREE *btree; WT_PAGE_HEADER *dsk; - size_t corrected_page_size; + size_t corrected_page_size, disk_img_buf_size; btree = S2BT(session); bm = btree->bm; @@ -2053,33 +2090,6 @@ __rec_split_init(WT_SESSION_IMPL *session, r->max_raw_page_size = r->page_size = (uint32_t)WT_MIN(r->page_size * 10, WT_MAX(r->page_size, btree->maxmempage / 2)); - - /* - * Ensure the disk image buffer is large enough for the max object, as - * corrected by the underlying block manager. - */ - corrected_page_size = r->page_size; - WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_init(session, &r->disk_image, corrected_page_size)); - - /* - * Clear the disk page header to ensure all of it is initialized, even - * the unused fields. - * - * In the case of fixed-length column-store, clear the entire buffer: - * fixed-length column-store sets bits in bytes, where the bytes are - * assumed to initially be 0. - */ - memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ? - corrected_page_size : WT_PAGE_HEADER_SIZE); - - /* - * Set the page type (the type doesn't change, and setting it later - * would require additional code in a few different places). - */ - dsk = r->disk_image.mem; - dsk->type = page->type; - /* * If we have to split, we want to choose a smaller page size for the * split pages, because otherwise we could end up splitting one large @@ -2099,22 +2109,28 @@ __rec_split_init(WT_SESSION_IMPL *session, * creating overflow items and compacted data, for example, as those * items have already been written to disk). So, the loop calls the * helper functions when approaching a split boundary, and we save the - * information at that point. That allows us to go back and split the - * page at the boundary points if we eventually overflow the maximum - * page size. + * information at that point. We also save the boundary information at + * the minimum split size. We maintain two chunks (each boundary + * represents a chunk that gets written as a page) in the memory, + * writing out the older one to the disk as a page when we need to make + * space for a new chunk. On reaching the last chunk, if it turns out to + * be smaller than the minimum split size, we go back into the + * penultimate chunk and split at this minimum split size boundary. This + * moves some data from the penultimate chunk to the last chunk, hence + * increasing the size of the last page written without decreasing the + * penultimate page size beyond the minimum split size. * * Finally, all this doesn't matter for fixed-size column-store pages, * raw compression, and salvage. Fixed-size column store pages can * split under (very) rare circumstances, but they're allocated at a * fixed page size, never anything smaller. In raw compression, the - * underlying compression routine decides when we split, so it's not - * our problem. In salvage, as noted above, we can't split at all. + * underlying compression routine decides when we split, so it's not our + * problem. In salvage, as noted above, we can't split at all. */ if (r->raw_compression || r->salvage != NULL) { r->split_size = 0; r->space_avail = r->page_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - } - else if (page->type == WT_PAGE_COL_FIX) { + } else if (page->type == WT_PAGE_COL_FIX) { r->split_size = r->page_size; r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); @@ -2122,32 +2138,53 @@ __rec_split_init(WT_SESSION_IMPL *session, r->split_size = __wt_split_page_size(btree, r->page_size); r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); + r->min_split_size = + __rec_min_split_page_size(btree, r->page_size); } + + /* + * Ensure the disk image buffer is large enough for the max object, as + * corrected by the underlying block manager. + * + * The buffer that we build disk image in, needs to hold two chunks + * worth of data. Since we want to support split_size more than the page + * size (to allow for adjustments based on the compression), this buffer + * should be greater of twice of split_size and page_size. + */ + corrected_page_size = r->page_size; + disk_img_buf_size = 2 * WT_MAX(corrected_page_size, r->split_size); + WT_RET(bm->write_size(bm, session, &corrected_page_size)); + WT_RET(__wt_buf_init(session, &r->disk_image, disk_img_buf_size)); + + /* + * Clear the disk page header to ensure all of it is initialized, even + * the unused fields. + * + * In the case of fixed-length column-store, clear the entire buffer: + * fixed-length column-store sets bits in bytes, where the bytes are + * assumed to initially be 0. + */ + memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ? + disk_img_buf_size : WT_PAGE_HEADER_SIZE); + + /* + * Set the page type (the type doesn't change, and setting it later + * would require additional code in a few different places). + */ + dsk = r->disk_image.mem; + dsk->type = page->type; + r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); /* Initialize the first boundary. */ r->bnd_next = 0; WT_RET(__rec_split_bnd_grow(session, r)); __rec_split_bnd_init(session, &r->bnd[0]); - r->bnd[0].recno = recno; + r->bnd[0].max_bnd_recno = recno; r->bnd[0].offset = WT_PAGE_HEADER_BYTE_SIZE(btree); - /* - * If the maximum page size is the same as the split page size, either - * because of the object type or application configuration, there isn't - * any need to maintain split boundaries within a larger page. - * - * No configuration for salvage here, because salvage can't split. - */ - if (r->raw_compression) - r->bnd_state = SPLIT_TRACKING_RAW; - else if (max == r->split_size) - r->bnd_state = SPLIT_TRACKING_OFF; - else - r->bnd_state = SPLIT_BOUNDARY; - - /* Initialize the entry counters. */ - r->entries = r->total_entries = 0; + /* Initialize the entry counter. */ + r->entries = 0; /* Initialize the starting record number. */ r->recno = recno; @@ -2350,19 +2387,112 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len) { WT_BM *bm; WT_BTREE *btree; - size_t corrected_page_size, len; + size_t corrected_page_size, inuse, len; btree = S2BT(session); bm = btree->bm; len = WT_PTRDIFF(r->first_free, r->disk_image.mem); - corrected_page_size = len + add_len; + inuse = (len - r->bnd[r->bnd_next].offset) + + WT_PAGE_HEADER_BYTE_SIZE(btree); + corrected_page_size = inuse + add_len; + WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_grow(session, &r->disk_image, corrected_page_size)); + /* Need to account for buffer carrying two chunks worth of data */ + WT_RET(__wt_buf_grow(session, &r->disk_image, 2 * corrected_page_size)); + r->first_free = (uint8_t *)r->disk_image.mem + len; - WT_ASSERT(session, corrected_page_size >= len); - r->space_avail = corrected_page_size - len; + WT_ASSERT(session, corrected_page_size >= inuse); + r->space_avail = corrected_page_size - inuse; WT_ASSERT(session, r->space_avail >= add_len); + + return (0); +} + +/* + * __rec_split_write_prev_and_shift_cur -- + * Write the previous split chunk to the disk as a page. Shift the contents + * of the current chunk to the start of the buffer, making space for a new + * chunk to be written. + * If the caller asks for a chunk resizing, the boundary between the two + * chunks is readjusted to the minimum split size boundary details stored + * in the previous chunk, letting the current chunk grow at the cost of the + * previous chunk. + */ +static int +__rec_split_write_prev_and_shift_cur( + WT_SESSION_IMPL *session, WT_RECONCILE *r, bool resize_chunks) +{ + WT_BM *bm; + WT_BOUNDARY *bnd_cur, *bnd_prev; + WT_BTREE *btree; + WT_PAGE_HEADER *dsk, *dsk_tmp; + size_t cur_len, len; + uint8_t *dsk_start; + + WT_ASSERT(session, r->bnd_next != 0); + + btree = S2BT(session); + bm = btree->bm; + bnd_cur = &r->bnd[r->bnd_next]; + bnd_prev = bnd_cur - 1; + dsk = r->disk_image.mem; + cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; + + /* + * Resize chunks if the current is smaller than the minimum, and there + * are details on the minimum split size boundary available in the + * previous boundary details. + * + * There is a possibility that we do not have a minimum boundary set, in + * such a case we skip chunk resizing. Such a condition is possible for + * instance when we are building the image in the buffer and the first + * K/V pair is large enough that it surpasses both the minimum split + * size and the split size the application has set. In such a case we + * split the chunk without saving any minimum boundary. + */ + if (resize_chunks && + cur_len < r->min_split_size && bnd_prev->min_bnd_offset != 0) { + bnd_cur->offset = bnd_prev->min_bnd_offset; + bnd_cur->max_bnd_entries += + bnd_prev->max_bnd_entries - bnd_prev->min_bnd_entries; + bnd_prev->max_bnd_entries = bnd_prev->min_bnd_entries; + bnd_cur->max_bnd_recno = bnd_prev->min_bnd_recno; + + WT_RET(__wt_buf_set(session, &bnd_cur->max_bnd_key, + bnd_prev->min_bnd_key.data, bnd_prev->min_bnd_key.size)); + + /* Update current chunk's length */ + cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; + } + + /* + * Create an interim buffer if not already done to prepare the previous + * chunk's disk image. + */ + len = bnd_cur->offset; + WT_RET(bm->write_size(bm, session, &len)); + if (r->interim_buf == NULL) + WT_RET(__wt_scr_alloc(session, len, &r->interim_buf)); + else + WT_RET(__wt_buf_init(session, r->interim_buf, len)); + + dsk_tmp = r->interim_buf->mem; + memcpy(dsk_tmp, dsk, bnd_cur->offset); + dsk_tmp->recno = bnd_prev->max_bnd_recno; + dsk_tmp->u.entries = bnd_prev->max_bnd_entries; + dsk_tmp->mem_size = WT_STORE_SIZE(bnd_cur->offset); + r->interim_buf->size = dsk_tmp->mem_size; + WT_RET(__rec_split_write(session, r, bnd_prev, r->interim_buf, false)); + + /* Shift the current chunk to the start of the buffer */ + dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); + (void)memmove(dsk_start, (uint8_t *)dsk + bnd_cur->offset, cur_len); + + /* Fix boundary offset */ + bnd_cur->offset = WT_PAGE_HEADER_BYTE_SIZE(btree); + /* Fix where free points */ + r->first_free = dsk_start + cur_len; return (0); } @@ -2382,6 +2512,9 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) btree = S2BT(session); dsk = r->disk_image.mem; + /* Fixed length col store can call with next_len 0 */ + WT_ASSERT(session, next_len == 0 || r->space_avail < next_len); + /* * We should never split during salvage, and we're about to drop core * because there's no parent page. @@ -2391,147 +2524,58 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) "%s page too large, attempted split during salvage", __wt_page_type_string(r->page->type)); - /* Hitting a page boundary resets the dictionary, in all cases. */ - __rec_dictionary_reset(r); - - inuse = WT_PTRDIFF(r->first_free, dsk); - switch (r->bnd_state) { - case SPLIT_BOUNDARY: - /* - * We can get here if the first key/value pair won't fit. - * Additionally, grow the buffer to contain the current item if - * we haven't already consumed a reasonable portion of a split - * chunk. - */ - if (inuse < r->split_size / 2) - break; - - /* - * About to cross a split boundary but not yet forced to split - * into multiple pages. If we have to split, this is one of the - * split points, save information about where we are when the - * split would have happened. - */ - WT_RET(__rec_split_bnd_grow(session, r)); - last = &r->bnd[r->bnd_next++]; - next = last + 1; - - /* Set the number of entries for the just finished chunk. */ - last->entries = r->entries - r->total_entries; - r->total_entries = r->entries; - - /* Set the key for the next chunk. */ - next->recno = r->recno; - if (dsk->type == WT_PAGE_ROW_INT || - dsk->type == WT_PAGE_ROW_LEAF) - WT_RET(__rec_split_row_promote( - session, r, &next->key, dsk->type)); - - /* - * Set the starting buffer offset and clear the entries (the - * latter not required, but cleaner). - */ - next->offset = WT_PTRDIFF(r->first_free, dsk); - next->entries = 0; - - /* Set the space available to another split-size chunk. */ - r->space_avail = - r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - - /* - * Adjust the space available to handle two cases: - * - We don't have enough room for another full split-size - * chunk on the page. - * - We chose to fill past a page boundary because of a - * large item. - */ - if (inuse + r->space_avail > r->page_size) { - r->space_avail = - r->page_size > inuse ? (r->page_size - inuse) : 0; - - /* There are no further boundary points. */ - r->bnd_state = SPLIT_MAX; - } - - /* - * Return if the next object fits into this page, else we have - * to split the page. - */ - if (r->space_avail >= next_len) - return (0); - - /* FALLTHROUGH */ - case SPLIT_MAX: - /* - * We're going to have to split and create multiple pages. - * - * Cycle through the saved split-point information, writing the - * split chunks we have tracked. The underlying fixup function - * sets the space available and other information, and copied - * any unwritten chunk of data to the beginning of the buffer. - */ - WT_RET(__rec_split_fixup(session, r)); - - /* We're done saving split chunks. */ - r->bnd_state = SPLIT_TRACKING_OFF; - break; - case SPLIT_TRACKING_OFF: - /* - * We can get here if the first key/value pair won't fit. - * Additionally, grow the buffer to contain the current item if - * we haven't already consumed a reasonable portion of a split - * chunk. - */ - if (inuse < r->split_size / 2) - break; + last = &r->bnd[r->bnd_next]; + inuse = (WT_PTRDIFF(r->first_free, dsk) - last->offset) + + WT_PAGE_HEADER_BYTE_SIZE(btree); - /* - * The key/value pairs didn't fit into a single page, but either - * we've already noticed that and are now processing the rest of - * the pairs at split size boundaries, or the split size was the - * same as the page size, and we never bothered with split point - * information at all. - */ - WT_RET(__rec_split_bnd_grow(session, r)); - last = &r->bnd[r->bnd_next++]; - next = last + 1; + /* + * We can get here if the first key/value pair won't fit. + * Additionally, grow the buffer to contain the current item if we + * haven't already consumed a reasonable portion of a split chunk. + */ + if (inuse < r->split_size / 2) + goto done; - /* - * Set the key for the next chunk (before writing the block, a - * key range is needed in that code). - */ - next->recno = r->recno; - if (dsk->type == WT_PAGE_ROW_INT || - dsk->type == WT_PAGE_ROW_LEAF) - WT_RET(__rec_split_row_promote( - session, r, &next->key, dsk->type)); + /* Hitting a page boundary resets the dictionary, in all cases. */ + __rec_dictionary_reset(r); - /* Clear the entries (not required, but cleaner). */ - next->entries = 0; + /* Set the number of entries for the just finished chunk. */ + last->max_bnd_entries = r->entries; - /* Finalize the header information and write the page. */ - dsk->recno = last->recno; - dsk->u.entries = r->entries; - dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); + /* + * In case of bulk load, write out chunks as we get them. + * In other cases, we keep two chunks in memory at a given time. So, if + * there is a previous chunk, write it out, making space in the buffer + * for the next chunk to be written. + */ + if (r->is_bulk_load) { + dsk->recno = last->max_bnd_recno; + dsk->u.entries = last->max_bnd_entries; + dsk->mem_size = (uint32_t)inuse; r->disk_image.size = dsk->mem_size; - WT_RET( - __rec_split_write(session, r, last, &r->disk_image, false)); - - /* - * Set the caller's entry count and buffer information for the - * next chunk. We only get here if we're not splitting or have - * already split, so it's split-size chunks from here on out. - */ - r->entries = 0; + WT_RET(__rec_split_write( + session, r, last, &r->disk_image, false)); + /* Fix where free points */ r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); - r->space_avail = - r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - break; - case SPLIT_TRACKING_RAW: - return (__wt_illegal_value(session, NULL)); - } + } else if (r->bnd_next != 0) + WT_RET(__rec_split_write_prev_and_shift_cur(session, r, false)); - /* + /* Prepare the next boundary */ + WT_RET(__rec_split_bnd_grow(session, r)); + r->bnd_next++; + next = &r->bnd[r->bnd_next]; + next->offset = WT_PTRDIFF(r->first_free, dsk); + /* Set the key for the next chunk. */ + next->max_bnd_recno = r->recno; + if (dsk->type == WT_PAGE_ROW_INT || dsk->type == WT_PAGE_ROW_LEAF) + WT_RET(__rec_split_row_promote( + session, r, &next->max_bnd_key, dsk->type)); + + r->entries = 0; + /* Set the space available to another split-size chunk. */ + r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); + +done: /* * Overflow values can be larger than the maximum page size but still be * "on-page". If the next key/value pair is larger than space available * after a split has happened (in other words, larger than the maximum @@ -2548,6 +2592,66 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) return (0); } +/* + * __rec_split_crossing_bnd -- + * Save the details for the minimum split size boundary or call for a + * split. + */ +static inline int +__rec_split_crossing_bnd( + WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) +{ + WT_BOUNDARY *bnd; + WT_BTREE *btree; + WT_PAGE_HEADER *dsk; + size_t min_bnd_offset; + + WT_ASSERT(session, WT_CHECK_CROSSING_BND(r, next_len)); + + /* + * If crossing the minimum split size boundary, store the boundary + * details at the current location in the buffer. If we are crossing the + * split boundary at the same time, possible when the next record is + * large enough, just split at this point. + */ + if (WT_CROSSING_MIN_BND(r, next_len) && + !WT_CROSSING_SPLIT_BND(r, next_len)) { + btree = S2BT(session); + bnd = &r->bnd[r->bnd_next]; + dsk = r->disk_image.mem; + min_bnd_offset = (WT_PTRDIFF(r->first_free, dsk) - + bnd->offset) + WT_PAGE_HEADER_BYTE_SIZE(btree); + if (min_bnd_offset == WT_PAGE_HEADER_BYTE_SIZE(btree)) + /* + * This is possible if the first record doesn't fit in + * the minimum split size, we write this record without + * setting up any boundary here. We will get the + * opportunity to setup a boundary before writing out + * the next record. + */ + return (0); + + WT_ASSERT(session, bnd->min_bnd_offset == 0); + + /* + * Hitting a page boundary resets the dictionary, in all cases. + */ + __rec_dictionary_reset(r); + + bnd->min_bnd_offset = min_bnd_offset; + bnd->min_bnd_entries = r->entries; + bnd->min_bnd_recno = r->recno; + if (dsk->type == WT_PAGE_ROW_INT || + dsk->type == WT_PAGE_ROW_LEAF) + WT_RET(__rec_split_row_promote( + session, r, &bnd->min_bnd_key, dsk->type)); + return (0); + } + + /* We are crossing a split boundary */ + return (__rec_split(session, r, next_len)); +} + /* * __rec_split_raw_worker -- * Handle the raw compression page reconciliation bookkeeping. @@ -2626,7 +2730,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, */ recno = WT_RECNO_OOB; if (dsk->type == WT_PAGE_COL_VAR) - recno = last->recno; + recno = last->max_bnd_recno; entry = max_image_slot = slots = 0; WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { @@ -2853,7 +2957,7 @@ no_slots: */ dst->size = result_len + WT_BLOCK_COMPRESS_SKIP; dsk_dst = dst->mem; - dsk_dst->recno = last->recno; + dsk_dst->recno = last->max_bnd_recno; dsk_dst->mem_size = r->raw_offsets[result_slots] + WT_BLOCK_COMPRESS_SKIP; dsk_dst->u.entries = r->raw_entries[result_slots - 1]; @@ -2873,7 +2977,7 @@ no_slots: WT_RET(__wt_strndup(session, dsk, dsk_dst->mem_size, &last->disk_image)); disk_image = last->disk_image; - disk_image->recno = last->recno; + disk_image->recno = last->max_bnd_recno; disk_image->mem_size = dsk_dst->mem_size; disk_image->u.entries = dsk_dst->u.entries; } @@ -2903,14 +3007,14 @@ no_slots: */ switch (dsk->type) { case WT_PAGE_COL_INT: - next->recno = r->raw_recnos[result_slots]; + next->max_bnd_recno = r->raw_recnos[result_slots]; break; case WT_PAGE_COL_VAR: - next->recno = r->raw_recnos[result_slots - 1]; + next->max_bnd_recno = r->raw_recnos[result_slots - 1]; break; case WT_PAGE_ROW_INT: case WT_PAGE_ROW_LEAF: - next->recno = WT_RECNO_OOB; + next->max_bnd_recno = WT_RECNO_OOB; if (!last_block) { /* * Confirm there was uncompressed data remaining @@ -2919,7 +3023,7 @@ no_slots: */ WT_ASSERT(session, len > 0); WT_RET(__rec_split_row_promote_cell( - session, dsk, &next->key)); + session, dsk, &next->max_bnd_key)); } break; } @@ -2931,7 +3035,7 @@ no_slots: */ WT_STAT_DATA_INCR(session, compress_raw_fail); - dsk->recno = last->recno; + dsk->recno = last->max_bnd_recno; dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); dsk->u.entries = r->entries; r->disk_image.size = dsk->mem_size; @@ -3008,35 +3112,9 @@ __rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) static int __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) { - WT_BOUNDARY *bnd; + WT_BOUNDARY *bnd_cur, *bnd_prev; WT_PAGE_HEADER *dsk; - - /* Adjust the boundary information based on our split status. */ - switch (r->bnd_state) { - case SPLIT_BOUNDARY: - case SPLIT_MAX: - /* - * We never split, the reconciled page fit into a maximum page - * size. Change the first boundary slot to represent the full - * page (the first boundary slot is largely correct, just update - * the number of entries). - */ - r->bnd_next = 0; - break; - case SPLIT_TRACKING_OFF: - /* - * If we have already split, or aren't tracking boundaries, put - * the remaining data in the next boundary slot. - */ - WT_RET(__rec_split_bnd_grow(session, r)); - break; - case SPLIT_TRACKING_RAW: - /* - * We were configured for raw compression, and either we never - * wrote anything, or there's a remaindered block of data. - */ - break; - } + bool grow_bnd; /* * We may arrive here with no entries to write if the page was entirely @@ -3063,20 +3141,66 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) return (EBUSY); } - /* Set the boundary reference and increment the count. */ - bnd = &r->bnd[r->bnd_next++]; - bnd->entries = r->entries; - - /* Finalize the header information. */ dsk = r->disk_image.mem; - dsk->recno = bnd->recno; - dsk->u.entries = r->entries; + + /* Set the number of entries for the just finished chunk. */ + bnd_cur = &r->bnd[r->bnd_next]; + bnd_cur->max_bnd_entries = r->entries; + + grow_bnd = true; + /* + * We can reach here even with raw_compression when the last split chunk + * is too small to be sent for raw compression. + */ + if (!r->is_bulk_load && !r->raw_compression) { + if (WT_PTRDIFF(r->first_free, dsk) > r->page_size && + r->bnd_next != 0) { + /* + * We hold two boundaries worth of data in the buffer, + * and this data doesn't fit in a single page. If the + * last chunk is too small, readjust the boundary to a + * pre-computed minimum. + * Write out the penultimate chunk to the disk as a page + */ + WT_RET(__rec_split_write_prev_and_shift_cur( + session, r, true)); + } else + if (r->bnd_next != 0) { + /* + * We have two boundaries, but the data in the + * buffer can fit a single page. Merge the + * boundaries to create a single chunk. + */ + bnd_prev = bnd_cur - 1; + bnd_prev->max_bnd_entries += + bnd_cur->max_bnd_entries; + r->bnd_next--; + grow_bnd = false; + } + } + + /* + * We already have space for an extra boundary if we merged two + * boundaries above, in that case we do not need to grow the boundary + * structure. + */ + if (grow_bnd) + WT_RET(__rec_split_bnd_grow(session, r)); + bnd_cur = &r->bnd[r->bnd_next]; + r->bnd_next++; + + /* + * Current boundary now has all the remaining data/last page now. + * Let's write it to the disk + */ + dsk->recno = bnd_cur->max_bnd_recno; + dsk->u.entries = bnd_cur->max_bnd_entries; dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); r->disk_image.size = dsk->mem_size; /* If this is a checkpoint, we're done, otherwise write the page. */ - return (__rec_is_checkpoint(session, r, bnd) ? - 0 : __rec_split_write(session, r, bnd, &r->disk_image, true)); + return (__rec_is_checkpoint(session, r, bnd_cur) ? + 0 : __rec_split_write(session, r, bnd_cur, &r->disk_image, true)); } /* @@ -3109,98 +3233,6 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) return (__rec_split_finish_std(session, r)); } -/* - * __rec_split_fixup -- - * Fix up after crossing the maximum page boundary. - */ -static int -__rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r) -{ - WT_BOUNDARY *bnd; - WT_BTREE *btree; - WT_DECL_ITEM(tmp); - WT_DECL_RET; - WT_PAGE_HEADER *dsk; - size_t i, len; - uint8_t *dsk_start, *p; - - /* - * When we overflow physical limits of the page, we walk the list of - * split chunks we've created and write those pages out, then update - * the caller's information. - */ - btree = S2BT(session); - - /* - * The data isn't laid out on a page boundary or nul padded; copy it to - * a clean, aligned, padded buffer before writing it. - * - * Allocate a scratch buffer to hold the new disk image. Copy the disk - * page's header and block-manager space into the scratch buffer, most - * of the header information remains unchanged between the pages. - */ - WT_RET(__wt_scr_alloc(session, r->disk_image.memsize, &tmp)); - dsk = tmp->mem; - memcpy(dsk, r->disk_image.mem, WT_PAGE_HEADER_BYTE_SIZE(btree)); - - /* - * For each split chunk we've created, update the disk image and copy - * it into place. - */ - dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); - for (i = 0, bnd = r->bnd; i < r->bnd_next; ++i, ++bnd) { - /* Copy the page contents to the temporary buffer. */ - len = (bnd + 1)->offset - bnd->offset; - memcpy(dsk_start, - (uint8_t *)r->disk_image.mem + bnd->offset, len); - - /* Finalize the header information and write the page. */ - dsk->recno = bnd->recno; - dsk->u.entries = bnd->entries; - tmp->size = WT_PAGE_HEADER_BYTE_SIZE(btree) + len; - dsk->mem_size = WT_STORE_SIZE(tmp->size); - WT_ERR(__rec_split_write(session, r, bnd, tmp, false)); - } - - /* - * There is probably a remnant in the working buffer that didn't get - * written, copy it down to the beginning of the working buffer. - * - * Confirm the remnant is no larger than a split-sized chunk, including - * header. We know that's the maximum sized remnant because we only have - * remnants if split switches from accumulating to a split boundary to - * accumulating to the end of the page (the other path here is when we - * hit a split boundary, there was room for another split chunk in the - * page, and the next item still wouldn't fit, in which case there is no - * remnant). So: we were accumulating to the end of the page and created - * a remnant. We know the remnant cannot be as large as a split-sized - * chunk, including header, because if there was room for that large a - * remnant, we wouldn't have switched from accumulating to a page end. - */ - p = (uint8_t *)r->disk_image.mem + bnd->offset; - len = WT_PTRDIFF(r->first_free, p); - if (len >= r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree)) - WT_PANIC_ERR(session, EINVAL, - "Reconciliation remnant too large for the split buffer"); - dsk = r->disk_image.mem; - dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); - (void)memmove(dsk_start, p, len); - - /* - * Fix up our caller's information, including updating the starting - * record number. - */ - r->entries -= r->total_entries; - r->first_free = dsk_start + len; - WT_ASSERT(session, - r->page_size >= (WT_PAGE_HEADER_BYTE_SIZE(btree) + len)); - r->space_avail = - r->split_size - (WT_PAGE_HEADER_BYTE_SIZE(btree) + len); - -err: __wt_scr_free(session, &tmp); - return (ret); -} - /* * __rec_split_write -- * Write a disk block out for the split helper functions. @@ -3238,8 +3270,6 @@ __rec_split_write(WT_SESSION_IMPL *session, F_SET(dsk, WT_PAGE_EMPTY_V_NONE); } - bnd->entries = r->entries; - /* Initialize the address (set the page type for the parent). */ switch (dsk->type) { case WT_PAGE_COL_FIX: @@ -3285,7 +3315,8 @@ __rec_split_write(WT_SESSION_IMPL *session, switch (page->type) { case WT_PAGE_COL_FIX: case WT_PAGE_COL_VAR: - if (WT_INSERT_RECNO(supd->ins) >= (bnd + 1)->recno) + if (WT_INSERT_RECNO(supd->ins) >= + (bnd + 1)->max_bnd_recno) goto supd_check_complete; break; case WT_PAGE_ROW_LEAF: @@ -3296,8 +3327,8 @@ __rec_split_write(WT_SESSION_IMPL *session, key->data = WT_INSERT_KEY(supd->ins); key->size = WT_INSERT_KEY_SIZE(supd->ins); } - WT_ERR(__wt_compare(session, - btree->collator, key, &(bnd + 1)->key, &cmp)); + WT_ERR(__wt_compare(session, btree->collator, + key, &(bnd + 1)->max_bnd_key, &cmp)); if (cmp >= 0) goto supd_check_complete; break; @@ -3387,14 +3418,14 @@ supd_check_complete: #ifdef HAVE_VERBOSE /* Output a verbose message if we create a page without many entries */ - if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && r->entries < 6) + if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && + bnd->max_bnd_entries < 6) __wt_verbose(session, WT_VERB_SPLIT, "Reconciliation creating a page with %" PRIu32 " entries, memory footprint %" WT_SIZET_FMT - ", page count %" PRIu32 ", %s, split state: %d", - r->entries, r->page->memory_footprint, r->bnd_next, - F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint", - r->bnd_state); + ", page count %" PRIu32 ", %s", bnd->max_bnd_entries, + r->page->memory_footprint, r->bnd_next, + F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint"); #endif WT_ERR(__wt_bt_write(session, buf, addr, &addr_size, @@ -3680,11 +3711,12 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) cursor->value.data, cursor->value.size, (uint64_t)0)); /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) - WT_RET( - __rec_split_raw(session, r, key->len + val->len)); - else { + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) + WT_RET(__rec_split_raw( + session, r, key->len + val->len)); + } else + if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) { /* * Turn off prefix compression until a full key written * to the new page, and (unless already working with an @@ -3696,10 +3728,9 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_RET(__rec_cell_build_leaf_key( session, r, NULL, 0, &ovfl_key)); } - - WT_RET(__rec_split(session, r, key->len + val->len)); + WT_RET(__rec_split_crossing_bnd( + session, r, key->len + val->len)); } - } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -3740,6 +3771,10 @@ __rec_col_fix_bulk_insert_split_check(WT_CURSOR_BULK *cbulk) * split. * * Boundary: split or write the page. + * + * No need to have a minimum split size boundary, all + * pages are filled 100% except the last, allowing it to + * grow in the future. */ __rec_incr(session, r, cbulk->entry, __bitstr_size( @@ -3844,10 +3879,12 @@ __wt_bulk_insert_var( r, cbulk->last.data, cbulk->last.size, cbulk->rle)); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_RET(__rec_split_raw(session, r, val->len)); + } else + if (WT_CROSSING_SPLIT_BND(r, val->len)) + WT_RET(__rec_split_crossing_bnd(session, r, val->len)); /* Copy the value onto the page. */ if (btree->dictionary) @@ -3983,10 +4020,13 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) WT_CHILD_RELEASE_ERR(session, hazard, ref); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_ERR(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_ERR(__rec_split_raw(session, r, val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, val->len)) + WT_ERR(__rec_split_crossing_bnd( + session, r, val->len)); /* Copy the value onto the page. */ __rec_copy_incr(session, r, val); @@ -4028,10 +4068,13 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) addr->addr, addr->size, __rec_vtype(addr), r->recno); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_RET(__rec_split_raw(session, r, val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, val->len)) + WT_RET(__rec_split_crossing_bnd( + session, r, val->len)); /* Copy the value onto the page. */ __rec_copy_incr(session, r, val); @@ -4139,6 +4182,10 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) * split. * * Boundary: split or write the page. + * + * No need to have a minimum split size boundary, all + * pages are filled 100% except the last, allowing it to + * grow in the future. */ __rec_incr(session, r, entry, __bitstr_size((size_t)entry * btree->bitcnt)); @@ -4295,10 +4342,13 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, session, r, value->data, value->size, rle)); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_RET(__rec_split_raw(session, r, val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, val->len)) + WT_RET(__rec_split_crossing_bnd( + session, r, val->len)); /* Copy the value onto the page. */ if (!deleted && !overflow_type && btree->dictionary) @@ -4961,11 +5011,12 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) r->cell_zero = false; /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) WT_ERR(__rec_split_raw( session, r, key->len + val->len)); - else { + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) { /* * In one path above, we copied address blocks * from the page rather than building the actual @@ -4977,10 +5028,10 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_IKEY_DATA(ikey), ikey->size)); key_onpage_ovfl = false; } - WT_ERR(__rec_split( + + WT_ERR(__rec_split_crossing_bnd( session, r, key->len + val->len)); } - } /* Copy the key and value onto the page. */ __rec_copy_incr(session, r, key); @@ -5030,10 +5081,14 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) addr->addr, addr->size, __rec_vtype(addr), WT_RECNO_OOB); /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, key->len + val->len) : - __rec_split(session, r, key->len + val->len)); + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) + WT_RET(__rec_split_raw( + session, r, key->len + val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) + WT_RET(__rec_split_crossing_bnd( + session, r, key->len + val->len)); /* Copy the key and value onto the page. */ __rec_copy_incr(session, r, key); @@ -5362,16 +5417,17 @@ build: } /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) WT_ERR(__rec_split_raw( session, r, key->len + val->len)); - else { + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) { /* - * In one path above, we copied address blocks - * from the page rather than building the actual - * key. In that case, we have to build the key - * now because we are about to promote it. + * If we copied address blocks from the page + * rather than building the actual key, we have + * to build the key now because we are about to + * promote it. */ if (key_onpage_ovfl) { WT_ERR(__wt_dsk_cell_data_ref(session, @@ -5390,14 +5446,13 @@ build: if (!ovfl_key) WT_ERR( __rec_cell_build_leaf_key( - session, - r, NULL, 0, &ovfl_key)); + session, r, NULL, 0, + &ovfl_key)); } - WT_ERR(__rec_split( + WT_ERR(__rec_split_crossing_bnd( session, r, key->len + val->len)); } - } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -5460,11 +5515,12 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins), &ovfl_key)); /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) WT_RET(__rec_split_raw( session, r, key->len + val->len)); - else { + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) { /* * Turn off prefix compression until a full key * written to the new page, and (unless already @@ -5476,14 +5532,13 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) if (!ovfl_key) WT_RET( __rec_cell_build_leaf_key( - session, - r, NULL, 0, &ovfl_key)); + session, r, NULL, 0, + &ovfl_key)); } - WT_RET(__rec_split( + WT_RET(__rec_split_crossing_bnd( session, r, key->len + val->len)); } - } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -5595,13 +5650,14 @@ __rec_split_dump_keys(WT_SESSION_IMPL *session, WT_PAGE *page, WT_RECONCILE *r) __wt_verbose(session, WT_VERB_SPLIT, "starting key %s", __wt_buf_set_printable( - session, bnd->key.data, bnd->key.size, tkey)); + session, bnd->max_bnd_key.data, + bnd->max_bnd_key.size, tkey)); break; case WT_PAGE_COL_FIX: case WT_PAGE_COL_INT: case WT_PAGE_COL_VAR: __wt_verbose(session, WT_VERB_SPLIT, - "starting recno %" PRIu64, bnd->recno); + "starting recno %" PRIu64, bnd->max_bnd_recno); break; WT_ILLEGAL_VALUE_ERR(session); } @@ -5863,10 +5919,10 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) /* We never set the first page's key, grab it from the original page. */ ref = r->ref; if (__wt_ref_is_root(ref)) - WT_RET(__wt_buf_set(session, &r->bnd[0].key, "", 1)); + WT_RET(__wt_buf_set(session, &r->bnd[0].max_bnd_key, "", 1)); else { __wt_ref_key(ref->home, ref, &p, &size); - WT_RET(__wt_buf_set(session, &r->bnd[0].key, p, size)); + WT_RET(__wt_buf_set(session, &r->bnd[0].max_bnd_key, p, size)); } /* Allocate, then initialize the array of replacement blocks. */ @@ -5874,8 +5930,8 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) for (multi = mod->mod_multi, bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) { - WT_RET(__wt_row_ikey_alloc(session, 0, - bnd->key.data, bnd->key.size, &multi->key.ikey)); + WT_RET(__wt_row_ikey_alloc(session, 0, bnd->max_bnd_key.data, + bnd->max_bnd_key.size, &multi->key.ikey)); /* * Copy any disk image. Don't take saved updates without a @@ -5922,7 +5978,7 @@ __rec_split_col(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) for (multi = mod->mod_multi, bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) { - multi->key.recno = bnd->recno; + multi->key.recno = bnd->max_bnd_recno; /* * Copy any disk image. Don't take saved updates without a diff --git a/test/format/config.h b/test/format/config.h index e3e1e73a786..b5feb7a5321 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -284,7 +284,7 @@ static CONFIG c[] = { { "split_pct", "page split size as a percentage of the maximum page size", - 0x0, 40, 85, 85, &g.c_split_pct, NULL }, + 0x0, 50, 100, 100, &g.c_split_pct, NULL }, { "statistics", "maintain statistics", /* 20% */ -- cgit v1.2.1 From ebff498af45a3e64fab05cd6360b117c010634b9 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 29 Mar 2017 15:55:04 +1100 Subject: Revert "WT-2439 Improve page layout: keep pages more than half full (#3277)" This reverts commit 1c41c7735b3529521b7bd34180f80584caee7f59. --- dist/api_data.py | 4 +- src/btree/bt_handle.c | 9 +- src/config/config_def.c | 16 +- src/include/btree.h | 6 - src/include/wiredtiger.in | 4 +- src/reconcile/rec_write.c | 954 ++++++++++++++++++++++------------------------ test/format/config.h | 2 +- 7 files changed, 463 insertions(+), 532 deletions(-) diff --git a/dist/api_data.py b/dist/api_data.py index 22600dd5e29..1d669fa7fe0 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -295,12 +295,12 @@ file_config = format_meta + file_runtime_config + [ Config('split_deepen_per_child', '0', r''' entries allocated per child when deepening the tree''', type='int', undoc=True), - Config('split_pct', '90', r''' + Config('split_pct', '75', r''' the Btree page split size as a percentage of the maximum Btree page size, that is, when a Btree page is split, it will be split into smaller pages, where each page is the specified percentage of the maximum Btree page size''', - min='50', max='100'), + min='25', max='100'), ] # File metadata, including both configurable and non-configurable (internal) diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 98c246fb897..f2bffee06da 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -780,16 +780,9 @@ __btree_page_sizes(WT_SESSION_IMPL *session) * Get the split percentage (reconciliation splits pages into smaller * than the maximum page size chunks so we don't split every time a * new entry is added). Determine how large newly split pages will be. - * Set to the minimum, if the read value is less than that. */ WT_RET(__wt_config_gets(session, cfg, "split_pct", &cval)); - if (cval.val < WT_BTREE_MIN_SPLIT_PCT) { - btree->split_pct = WT_BTREE_MIN_SPLIT_PCT; - WT_RET(__wt_msg(session, - "Re-setting split_pct for %s to the minimum allowed of " - "%d%%.", session->dhandle->name, WT_BTREE_MIN_SPLIT_PCT)); - } else - btree->split_pct = (int)cval.val; + btree->split_pct = (int)cval.val; intl_split_size = __wt_split_page_size(btree, btree->maxintlpage); leaf_split_size = __wt_split_page_size(btree, btree->maxleafpage); diff --git a/src/config/config_def.c b/src/config/config_def.c index f152fbacad4..b11a8d63fdb 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -294,7 +294,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = { { "source", "string", NULL, NULL, NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, { "type", "string", NULL, NULL, NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, @@ -466,7 +466,7 @@ static const WT_CONFIG_CHECK confchk_file_config[] = { { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, @@ -530,7 +530,7 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = { { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, @@ -614,7 +614,7 @@ static const WT_CONFIG_CHECK confchk_lsm_meta[] = { { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, @@ -1119,7 +1119,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "chunk_size=10MB,merge_max=15,merge_min=0),memory_page_max=5MB," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,source=,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=90,type=file,value_format=u", + "split_deepen_per_child=0,split_pct=75,type=file,value_format=u", confchk_WT_SESSION_create, 42 }, { "WT_SESSION.drop", @@ -1213,7 +1213,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "leaf_value_max=0,log=(enabled=true),memory_page_max=5MB," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=90,value_format=u", + "split_deepen_per_child=0,split_pct=75,value_format=u", confchk_file_config, 35 }, { "file.meta", @@ -1228,7 +1228,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=0," "log=(enabled=true),memory_page_max=5MB,os_cache_dirty_max=0," "os_cache_max=0,prefix_compression=false,prefix_compression_min=4" - ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90," + ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75," "value_format=u,version=(major=0,minor=0)", confchk_file_meta, 39 }, @@ -1253,7 +1253,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "merge_min=0),memory_page_max=5MB,old_chunks=," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=90,value_format=u", + "split_deepen_per_child=0,split_pct=75,value_format=u", confchk_lsm_meta, 39 }, { "table.meta", diff --git a/src/include/btree.h b/src/include/btree.h index 28fe1b94b23..88312f408cc 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -57,12 +57,6 @@ /* Evict pages if we see this many consecutive deleted records. */ #define WT_BTREE_DELETE_THRESHOLD 1000 -/* - * Minimum size of the chunks (in percentage of the page size) a page gets split - * into during reconciliation. - */ -#define WT_BTREE_MIN_SPLIT_PCT 50 - /* * WT_BTREE -- * A btree handle. diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 707159ef6ae..558e93d3de0 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1242,8 +1242,8 @@ struct __wt_session { * @config{split_pct, the Btree page split size as a percentage of the * maximum Btree page size\, that is\, when a Btree page is split\, it * will be split into smaller pages\, where each page is the specified - * percentage of the maximum Btree page size., an integer between 50 and - * 100; default \c 90.} + * percentage of the maximum Btree page size., an integer between 25 and + * 100; default \c 75.} * @config{type, set the type of data source used to store a column * group\, index or simple table. By default\, a \c "file:" URI is * derived from the object name. The \c type configuration can be used diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index e18d44f96ff..23f654caa70 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -26,11 +26,6 @@ typedef struct { uint32_t flags; /* Caller's configuration */ WT_ITEM disk_image; /* Temporary disk-image buffer */ - /* - * Temporary buffer used to write out a disk image when managing two - * chunks worth of data in memory - */ - WT_ITEM *interim_buf; /* * Track start/stop write generation to decide if all changes to the @@ -132,7 +127,6 @@ typedef struct { * repeatedly split a packed page. */ uint32_t split_size; /* Split page size */ - uint32_t min_split_size; /* Minimum split page size */ /* * The problem with splits is we've done a lot of work by the time we @@ -157,6 +151,16 @@ typedef struct { */ size_t offset; /* Split's first byte */ + /* + * The recno and entries fields are the starting record number + * of the split chunk (for column-store splits), and the number + * of entries in the split chunk. These fields are used both + * to write the split chunk, and to create a new internal page + * to reference the split pages. + */ + uint64_t recno; /* Split's starting record */ + uint32_t entries; /* Split's entries */ + WT_ADDR addr; /* Split's written location */ uint32_t size; /* Split's size */ uint32_t checksum; /* Split's checksum */ @@ -178,42 +182,39 @@ typedef struct { size_t supd_allocated; /* - * While reconciling pages, at any given time, we maintain two - * split chunks in the memory to be written out as pages. As we - * get to the last two chunks, if the last one turns out to be - * smaller than the minimum split size, we go back into the - * penultimate chunk and split at this minimum split size - * boundary. This moves some data from the penultimate chunk to - * the last chunk, hence increasing the size of the last page - * written without decreasing the penultimate page size beyond - * the minimum split size. For this reason, we maintain both a - * maximum split percentage boundary and a minimum split - * percentage boundary. - * - * The recno and entries fields are the starting record number - * of the split chunk (for column-store splits), and the number - * of entries in the split chunk. These fields are used both to - * write the split chunk, and to create a new internal page to - * reference the split pages. - * * The key for a row-store page; no column-store key is needed * because the page's recno, stored in the recno field, is the * column-store key. */ - uint32_t max_bnd_entries; - uint64_t max_bnd_recno; - WT_ITEM max_bnd_key; - - size_t min_bnd_offset; - uint32_t min_bnd_entries; - uint64_t min_bnd_recno; - WT_ITEM min_bnd_key; + WT_ITEM key; /* Promoted row-store key */ } *bnd; /* Saved boundaries */ uint32_t bnd_next; /* Next boundary slot */ uint32_t bnd_next_max; /* Maximum boundary slots used */ size_t bnd_entries; /* Total boundary slots */ size_t bnd_allocated; /* Bytes allocated */ + /* + * We track the total number of page entries copied into split chunks + * so we can easily figure out how many entries in the current split + * chunk. + */ + uint32_t total_entries; /* Total entries in splits */ + + /* + * And there's state information as to where in this process we are: + * (1) tracking split boundaries because we can still fit more split + * chunks into the maximum page size, (2) tracking the maximum page + * size boundary because we can't fit any more split chunks into the + * maximum page size, (3) not performing boundary checks because it's + * either not useful with the current page size configuration, or + * because we've already been forced to split. + */ + enum { SPLIT_BOUNDARY=0, /* Next: a split page boundary */ + SPLIT_MAX=1, /* Next: the maximum page boundary */ + SPLIT_TRACKING_OFF=2, /* No boundary checks */ + SPLIT_TRACKING_RAW=3 } /* Underlying compression decides */ + bnd_state; + /* * We track current information about the current record number, the * number of entries copied into the temporary buffer, where we are @@ -292,14 +293,6 @@ typedef struct { uint32_t tested_ref_state; /* Debugging information */ } WT_RECONCILE; -#define WT_CROSSING_MIN_BND(r, next_len) \ - ((r)->bnd[(r)->bnd_next].min_bnd_offset == 0 && \ - ((r)->space_avail - (next_len)) < \ - ((r)->split_size - (r)->min_split_size)) -#define WT_CROSSING_SPLIT_BND(r, next_len) ((next_len) > (r)->space_avail) -#define WT_CHECK_CROSSING_BND(r, next_len) \ - (WT_CROSSING_MIN_BND(r, next_len) || WT_CROSSING_SPLIT_BND(r, next_len)) - static void __rec_bnd_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *, bool); static void __rec_cell_build_addr(WT_SESSION_IMPL *, WT_RECONCILE *, const void *, size_t, u_int, uint64_t); @@ -321,7 +314,6 @@ static int __rec_col_var(WT_SESSION_IMPL *, static int __rec_col_var_helper(WT_SESSION_IMPL *, WT_RECONCILE *, WT_SALVAGE_COOKIE *, WT_ITEM *, bool, uint8_t, uint64_t); static int __rec_destroy_session(WT_SESSION_IMPL *); -static uint32_t __rec_min_split_page_size(WT_BTREE *, uint32_t); static int __rec_root_write(WT_SESSION_IMPL *, WT_PAGE *, uint32_t); static int __rec_row_int(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_row_leaf(WT_SESSION_IMPL *, @@ -331,6 +323,7 @@ static int __rec_row_leaf_insert( static int __rec_row_merge(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_split_col(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_split_discard(WT_SESSION_IMPL *, WT_PAGE *); +static int __rec_split_fixup(WT_SESSION_IMPL *, WT_RECONCILE *); static int __rec_split_row(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_split_row_promote( WT_SESSION_IMPL *, WT_RECONCILE *, WT_ITEM *, uint8_t); @@ -975,7 +968,6 @@ __rec_destroy(WT_SESSION_IMPL *session, void *reconcilep) *(WT_RECONCILE **)reconcilep = NULL; __wt_buf_free(session, &r->disk_image); - __wt_scr_free(session, &r->interim_buf); __wt_free(session, r->raw_entries); __wt_free(session, r->raw_offsets); @@ -1040,8 +1032,7 @@ __rec_bnd_cleanup(WT_SESSION_IMPL *session, WT_RECONCILE *r, bool destroy) __wt_free(session, bnd->addr.addr); __wt_free(session, bnd->disk_image); __wt_free(session, bnd->supd); - __wt_buf_free(session, &bnd->max_bnd_key); - __wt_buf_free(session, &bnd->min_bnd_key); + __wt_buf_free(session, &bnd->key); } __wt_free(session, r->bnd); r->bnd_next = 0; @@ -1936,8 +1927,8 @@ static void __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) { bnd->offset = 0; - bnd->max_bnd_recno = WT_RECNO_OOB; - bnd->max_bnd_entries = 0; + bnd->recno = WT_RECNO_OOB; + bnd->entries = 0; __wt_free(session, bnd->addr.addr); WT_CLEAR(bnd->addr); @@ -1952,10 +1943,6 @@ __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) bnd->already_compressed = false; - bnd->min_bnd_offset = 0; - bnd->min_bnd_entries = 0; - bnd->min_bnd_recno = WT_RECNO_OOB; - /* * Don't touch the key, we re-use that memory in each new * reconciliation. @@ -1987,63 +1974,39 @@ __rec_split_bnd_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r) } /* - * __rec_split_page_size_from_pct -- - * Given a split percentage, calculate split page size in bytes. + * __wt_split_page_size -- + * Split page size calculation: we don't want to repeatedly split every + * time a new entry is added, so we split to a smaller-than-maximum page size. */ -static uint32_t -__rec_split_page_size_from_pct( - int split_pct, uint32_t maxpagesize, uint32_t allocsize) { +uint32_t +__wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) +{ uintmax_t a; uint32_t split_size; /* * Ideally, the split page size is some percentage of the maximum page - * size rounded to an allocation unit (round to an allocation unit so we - * don't waste space when we write). + * size rounded to an allocation unit (round to an allocation unit so + * we don't waste space when we write). */ a = maxpagesize; /* Don't overflow. */ split_size = (uint32_t)WT_ALIGN_NEAREST( - (a * (u_int)split_pct) / 100, allocsize); + (a * (u_int)btree->split_pct) / 100, btree->allocsize); /* - * Respect the configured split percentage if the calculated split size - * is either zero or a full page. The user has either configured an - * allocation size that matches the page size, or a split percentage - * that is close to zero or one hundred. Rounding is going to provide a - * worse outcome than having a split point that doesn't fall on an - * allocation size boundary in those cases. + * Respect the configured split percentage if the calculated split + * size is either zero or a full page. The user has either configured + * an allocation size that matches the page size, or a split + * percentage that is close to zero or one hundred. Rounding is going + * to provide a worse outcome than having a split point that doesn't + * fall on an allocation size boundary in those cases. */ if (split_size == 0 || split_size == maxpagesize) - split_size = (uint32_t)((a * (u_int)split_pct) / 100); + split_size = (uint32_t)((a * (u_int)btree->split_pct) / 100); return (split_size); } -/* - * __wt_split_page_size -- - * Split page size calculation: we don't want to repeatedly split every - * time a new entry is added, so we split to a smaller-than-maximum page size. - */ -uint32_t -__wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) -{ - return (__rec_split_page_size_from_pct( - btree->split_pct, maxpagesize, btree->allocsize)); -} - -/* - * __rec_min_split_page_size -- - * Minimum split size boundary calculation: To track a boundary at the - * minimum split size that we could have split at instead of splitting at - * the split page size. - */ -static uint32_t -__rec_min_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) -{ - return (__rec_split_page_size_from_pct( - WT_BTREE_MIN_SPLIT_PCT, maxpagesize, btree->allocsize)); -} - /* * __rec_split_init -- * Initialization for the reconciliation split functions. @@ -2055,7 +2018,7 @@ __rec_split_init(WT_SESSION_IMPL *session, WT_BM *bm; WT_BTREE *btree; WT_PAGE_HEADER *dsk; - size_t corrected_page_size, disk_img_buf_size; + size_t corrected_page_size; btree = S2BT(session); bm = btree->bm; @@ -2090,6 +2053,33 @@ __rec_split_init(WT_SESSION_IMPL *session, r->max_raw_page_size = r->page_size = (uint32_t)WT_MIN(r->page_size * 10, WT_MAX(r->page_size, btree->maxmempage / 2)); + + /* + * Ensure the disk image buffer is large enough for the max object, as + * corrected by the underlying block manager. + */ + corrected_page_size = r->page_size; + WT_RET(bm->write_size(bm, session, &corrected_page_size)); + WT_RET(__wt_buf_init(session, &r->disk_image, corrected_page_size)); + + /* + * Clear the disk page header to ensure all of it is initialized, even + * the unused fields. + * + * In the case of fixed-length column-store, clear the entire buffer: + * fixed-length column-store sets bits in bytes, where the bytes are + * assumed to initially be 0. + */ + memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ? + corrected_page_size : WT_PAGE_HEADER_SIZE); + + /* + * Set the page type (the type doesn't change, and setting it later + * would require additional code in a few different places). + */ + dsk = r->disk_image.mem; + dsk->type = page->type; + /* * If we have to split, we want to choose a smaller page size for the * split pages, because otherwise we could end up splitting one large @@ -2109,28 +2099,22 @@ __rec_split_init(WT_SESSION_IMPL *session, * creating overflow items and compacted data, for example, as those * items have already been written to disk). So, the loop calls the * helper functions when approaching a split boundary, and we save the - * information at that point. We also save the boundary information at - * the minimum split size. We maintain two chunks (each boundary - * represents a chunk that gets written as a page) in the memory, - * writing out the older one to the disk as a page when we need to make - * space for a new chunk. On reaching the last chunk, if it turns out to - * be smaller than the minimum split size, we go back into the - * penultimate chunk and split at this minimum split size boundary. This - * moves some data from the penultimate chunk to the last chunk, hence - * increasing the size of the last page written without decreasing the - * penultimate page size beyond the minimum split size. + * information at that point. That allows us to go back and split the + * page at the boundary points if we eventually overflow the maximum + * page size. * * Finally, all this doesn't matter for fixed-size column-store pages, * raw compression, and salvage. Fixed-size column store pages can * split under (very) rare circumstances, but they're allocated at a * fixed page size, never anything smaller. In raw compression, the - * underlying compression routine decides when we split, so it's not our - * problem. In salvage, as noted above, we can't split at all. + * underlying compression routine decides when we split, so it's not + * our problem. In salvage, as noted above, we can't split at all. */ if (r->raw_compression || r->salvage != NULL) { r->split_size = 0; r->space_avail = r->page_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - } else if (page->type == WT_PAGE_COL_FIX) { + } + else if (page->type == WT_PAGE_COL_FIX) { r->split_size = r->page_size; r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); @@ -2138,53 +2122,32 @@ __rec_split_init(WT_SESSION_IMPL *session, r->split_size = __wt_split_page_size(btree, r->page_size); r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - r->min_split_size = - __rec_min_split_page_size(btree, r->page_size); } - - /* - * Ensure the disk image buffer is large enough for the max object, as - * corrected by the underlying block manager. - * - * The buffer that we build disk image in, needs to hold two chunks - * worth of data. Since we want to support split_size more than the page - * size (to allow for adjustments based on the compression), this buffer - * should be greater of twice of split_size and page_size. - */ - corrected_page_size = r->page_size; - disk_img_buf_size = 2 * WT_MAX(corrected_page_size, r->split_size); - WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_init(session, &r->disk_image, disk_img_buf_size)); - - /* - * Clear the disk page header to ensure all of it is initialized, even - * the unused fields. - * - * In the case of fixed-length column-store, clear the entire buffer: - * fixed-length column-store sets bits in bytes, where the bytes are - * assumed to initially be 0. - */ - memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ? - disk_img_buf_size : WT_PAGE_HEADER_SIZE); - - /* - * Set the page type (the type doesn't change, and setting it later - * would require additional code in a few different places). - */ - dsk = r->disk_image.mem; - dsk->type = page->type; - r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); /* Initialize the first boundary. */ r->bnd_next = 0; WT_RET(__rec_split_bnd_grow(session, r)); __rec_split_bnd_init(session, &r->bnd[0]); - r->bnd[0].max_bnd_recno = recno; + r->bnd[0].recno = recno; r->bnd[0].offset = WT_PAGE_HEADER_BYTE_SIZE(btree); - /* Initialize the entry counter. */ - r->entries = 0; + /* + * If the maximum page size is the same as the split page size, either + * because of the object type or application configuration, there isn't + * any need to maintain split boundaries within a larger page. + * + * No configuration for salvage here, because salvage can't split. + */ + if (r->raw_compression) + r->bnd_state = SPLIT_TRACKING_RAW; + else if (max == r->split_size) + r->bnd_state = SPLIT_TRACKING_OFF; + else + r->bnd_state = SPLIT_BOUNDARY; + + /* Initialize the entry counters. */ + r->entries = r->total_entries = 0; /* Initialize the starting record number. */ r->recno = recno; @@ -2387,112 +2350,19 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len) { WT_BM *bm; WT_BTREE *btree; - size_t corrected_page_size, inuse, len; + size_t corrected_page_size, len; btree = S2BT(session); bm = btree->bm; len = WT_PTRDIFF(r->first_free, r->disk_image.mem); - inuse = (len - r->bnd[r->bnd_next].offset) + - WT_PAGE_HEADER_BYTE_SIZE(btree); - corrected_page_size = inuse + add_len; - + corrected_page_size = len + add_len; WT_RET(bm->write_size(bm, session, &corrected_page_size)); - /* Need to account for buffer carrying two chunks worth of data */ - WT_RET(__wt_buf_grow(session, &r->disk_image, 2 * corrected_page_size)); - + WT_RET(__wt_buf_grow(session, &r->disk_image, corrected_page_size)); r->first_free = (uint8_t *)r->disk_image.mem + len; - WT_ASSERT(session, corrected_page_size >= inuse); - r->space_avail = corrected_page_size - inuse; + WT_ASSERT(session, corrected_page_size >= len); + r->space_avail = corrected_page_size - len; WT_ASSERT(session, r->space_avail >= add_len); - - return (0); -} - -/* - * __rec_split_write_prev_and_shift_cur -- - * Write the previous split chunk to the disk as a page. Shift the contents - * of the current chunk to the start of the buffer, making space for a new - * chunk to be written. - * If the caller asks for a chunk resizing, the boundary between the two - * chunks is readjusted to the minimum split size boundary details stored - * in the previous chunk, letting the current chunk grow at the cost of the - * previous chunk. - */ -static int -__rec_split_write_prev_and_shift_cur( - WT_SESSION_IMPL *session, WT_RECONCILE *r, bool resize_chunks) -{ - WT_BM *bm; - WT_BOUNDARY *bnd_cur, *bnd_prev; - WT_BTREE *btree; - WT_PAGE_HEADER *dsk, *dsk_tmp; - size_t cur_len, len; - uint8_t *dsk_start; - - WT_ASSERT(session, r->bnd_next != 0); - - btree = S2BT(session); - bm = btree->bm; - bnd_cur = &r->bnd[r->bnd_next]; - bnd_prev = bnd_cur - 1; - dsk = r->disk_image.mem; - cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; - - /* - * Resize chunks if the current is smaller than the minimum, and there - * are details on the minimum split size boundary available in the - * previous boundary details. - * - * There is a possibility that we do not have a minimum boundary set, in - * such a case we skip chunk resizing. Such a condition is possible for - * instance when we are building the image in the buffer and the first - * K/V pair is large enough that it surpasses both the minimum split - * size and the split size the application has set. In such a case we - * split the chunk without saving any minimum boundary. - */ - if (resize_chunks && - cur_len < r->min_split_size && bnd_prev->min_bnd_offset != 0) { - bnd_cur->offset = bnd_prev->min_bnd_offset; - bnd_cur->max_bnd_entries += - bnd_prev->max_bnd_entries - bnd_prev->min_bnd_entries; - bnd_prev->max_bnd_entries = bnd_prev->min_bnd_entries; - bnd_cur->max_bnd_recno = bnd_prev->min_bnd_recno; - - WT_RET(__wt_buf_set(session, &bnd_cur->max_bnd_key, - bnd_prev->min_bnd_key.data, bnd_prev->min_bnd_key.size)); - - /* Update current chunk's length */ - cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; - } - - /* - * Create an interim buffer if not already done to prepare the previous - * chunk's disk image. - */ - len = bnd_cur->offset; - WT_RET(bm->write_size(bm, session, &len)); - if (r->interim_buf == NULL) - WT_RET(__wt_scr_alloc(session, len, &r->interim_buf)); - else - WT_RET(__wt_buf_init(session, r->interim_buf, len)); - - dsk_tmp = r->interim_buf->mem; - memcpy(dsk_tmp, dsk, bnd_cur->offset); - dsk_tmp->recno = bnd_prev->max_bnd_recno; - dsk_tmp->u.entries = bnd_prev->max_bnd_entries; - dsk_tmp->mem_size = WT_STORE_SIZE(bnd_cur->offset); - r->interim_buf->size = dsk_tmp->mem_size; - WT_RET(__rec_split_write(session, r, bnd_prev, r->interim_buf, false)); - - /* Shift the current chunk to the start of the buffer */ - dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); - (void)memmove(dsk_start, (uint8_t *)dsk + bnd_cur->offset, cur_len); - - /* Fix boundary offset */ - bnd_cur->offset = WT_PAGE_HEADER_BYTE_SIZE(btree); - /* Fix where free points */ - r->first_free = dsk_start + cur_len; return (0); } @@ -2512,9 +2382,6 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) btree = S2BT(session); dsk = r->disk_image.mem; - /* Fixed length col store can call with next_len 0 */ - WT_ASSERT(session, next_len == 0 || r->space_avail < next_len); - /* * We should never split during salvage, and we're about to drop core * because there's no parent page. @@ -2524,58 +2391,147 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) "%s page too large, attempted split during salvage", __wt_page_type_string(r->page->type)); - last = &r->bnd[r->bnd_next]; - inuse = (WT_PTRDIFF(r->first_free, dsk) - last->offset) + - WT_PAGE_HEADER_BYTE_SIZE(btree); - - /* - * We can get here if the first key/value pair won't fit. - * Additionally, grow the buffer to contain the current item if we - * haven't already consumed a reasonable portion of a split chunk. - */ - if (inuse < r->split_size / 2) - goto done; - /* Hitting a page boundary resets the dictionary, in all cases. */ __rec_dictionary_reset(r); - /* Set the number of entries for the just finished chunk. */ - last->max_bnd_entries = r->entries; + inuse = WT_PTRDIFF(r->first_free, dsk); + switch (r->bnd_state) { + case SPLIT_BOUNDARY: + /* + * We can get here if the first key/value pair won't fit. + * Additionally, grow the buffer to contain the current item if + * we haven't already consumed a reasonable portion of a split + * chunk. + */ + if (inuse < r->split_size / 2) + break; - /* - * In case of bulk load, write out chunks as we get them. - * In other cases, we keep two chunks in memory at a given time. So, if - * there is a previous chunk, write it out, making space in the buffer - * for the next chunk to be written. - */ - if (r->is_bulk_load) { - dsk->recno = last->max_bnd_recno; - dsk->u.entries = last->max_bnd_entries; - dsk->mem_size = (uint32_t)inuse; + /* + * About to cross a split boundary but not yet forced to split + * into multiple pages. If we have to split, this is one of the + * split points, save information about where we are when the + * split would have happened. + */ + WT_RET(__rec_split_bnd_grow(session, r)); + last = &r->bnd[r->bnd_next++]; + next = last + 1; + + /* Set the number of entries for the just finished chunk. */ + last->entries = r->entries - r->total_entries; + r->total_entries = r->entries; + + /* Set the key for the next chunk. */ + next->recno = r->recno; + if (dsk->type == WT_PAGE_ROW_INT || + dsk->type == WT_PAGE_ROW_LEAF) + WT_RET(__rec_split_row_promote( + session, r, &next->key, dsk->type)); + + /* + * Set the starting buffer offset and clear the entries (the + * latter not required, but cleaner). + */ + next->offset = WT_PTRDIFF(r->first_free, dsk); + next->entries = 0; + + /* Set the space available to another split-size chunk. */ + r->space_avail = + r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); + + /* + * Adjust the space available to handle two cases: + * - We don't have enough room for another full split-size + * chunk on the page. + * - We chose to fill past a page boundary because of a + * large item. + */ + if (inuse + r->space_avail > r->page_size) { + r->space_avail = + r->page_size > inuse ? (r->page_size - inuse) : 0; + + /* There are no further boundary points. */ + r->bnd_state = SPLIT_MAX; + } + + /* + * Return if the next object fits into this page, else we have + * to split the page. + */ + if (r->space_avail >= next_len) + return (0); + + /* FALLTHROUGH */ + case SPLIT_MAX: + /* + * We're going to have to split and create multiple pages. + * + * Cycle through the saved split-point information, writing the + * split chunks we have tracked. The underlying fixup function + * sets the space available and other information, and copied + * any unwritten chunk of data to the beginning of the buffer. + */ + WT_RET(__rec_split_fixup(session, r)); + + /* We're done saving split chunks. */ + r->bnd_state = SPLIT_TRACKING_OFF; + break; + case SPLIT_TRACKING_OFF: + /* + * We can get here if the first key/value pair won't fit. + * Additionally, grow the buffer to contain the current item if + * we haven't already consumed a reasonable portion of a split + * chunk. + */ + if (inuse < r->split_size / 2) + break; + + /* + * The key/value pairs didn't fit into a single page, but either + * we've already noticed that and are now processing the rest of + * the pairs at split size boundaries, or the split size was the + * same as the page size, and we never bothered with split point + * information at all. + */ + WT_RET(__rec_split_bnd_grow(session, r)); + last = &r->bnd[r->bnd_next++]; + next = last + 1; + + /* + * Set the key for the next chunk (before writing the block, a + * key range is needed in that code). + */ + next->recno = r->recno; + if (dsk->type == WT_PAGE_ROW_INT || + dsk->type == WT_PAGE_ROW_LEAF) + WT_RET(__rec_split_row_promote( + session, r, &next->key, dsk->type)); + + /* Clear the entries (not required, but cleaner). */ + next->entries = 0; + + /* Finalize the header information and write the page. */ + dsk->recno = last->recno; + dsk->u.entries = r->entries; + dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); r->disk_image.size = dsk->mem_size; - WT_RET(__rec_split_write( - session, r, last, &r->disk_image, false)); - /* Fix where free points */ + WT_RET( + __rec_split_write(session, r, last, &r->disk_image, false)); + + /* + * Set the caller's entry count and buffer information for the + * next chunk. We only get here if we're not splitting or have + * already split, so it's split-size chunks from here on out. + */ + r->entries = 0; r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); - } else if (r->bnd_next != 0) - WT_RET(__rec_split_write_prev_and_shift_cur(session, r, false)); + r->space_avail = + r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); + break; + case SPLIT_TRACKING_RAW: + return (__wt_illegal_value(session, NULL)); + } - /* Prepare the next boundary */ - WT_RET(__rec_split_bnd_grow(session, r)); - r->bnd_next++; - next = &r->bnd[r->bnd_next]; - next->offset = WT_PTRDIFF(r->first_free, dsk); - /* Set the key for the next chunk. */ - next->max_bnd_recno = r->recno; - if (dsk->type == WT_PAGE_ROW_INT || dsk->type == WT_PAGE_ROW_LEAF) - WT_RET(__rec_split_row_promote( - session, r, &next->max_bnd_key, dsk->type)); - - r->entries = 0; - /* Set the space available to another split-size chunk. */ - r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - -done: /* + /* * Overflow values can be larger than the maximum page size but still be * "on-page". If the next key/value pair is larger than space available * after a split has happened (in other words, larger than the maximum @@ -2592,66 +2548,6 @@ done: /* return (0); } -/* - * __rec_split_crossing_bnd -- - * Save the details for the minimum split size boundary or call for a - * split. - */ -static inline int -__rec_split_crossing_bnd( - WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) -{ - WT_BOUNDARY *bnd; - WT_BTREE *btree; - WT_PAGE_HEADER *dsk; - size_t min_bnd_offset; - - WT_ASSERT(session, WT_CHECK_CROSSING_BND(r, next_len)); - - /* - * If crossing the minimum split size boundary, store the boundary - * details at the current location in the buffer. If we are crossing the - * split boundary at the same time, possible when the next record is - * large enough, just split at this point. - */ - if (WT_CROSSING_MIN_BND(r, next_len) && - !WT_CROSSING_SPLIT_BND(r, next_len)) { - btree = S2BT(session); - bnd = &r->bnd[r->bnd_next]; - dsk = r->disk_image.mem; - min_bnd_offset = (WT_PTRDIFF(r->first_free, dsk) - - bnd->offset) + WT_PAGE_HEADER_BYTE_SIZE(btree); - if (min_bnd_offset == WT_PAGE_HEADER_BYTE_SIZE(btree)) - /* - * This is possible if the first record doesn't fit in - * the minimum split size, we write this record without - * setting up any boundary here. We will get the - * opportunity to setup a boundary before writing out - * the next record. - */ - return (0); - - WT_ASSERT(session, bnd->min_bnd_offset == 0); - - /* - * Hitting a page boundary resets the dictionary, in all cases. - */ - __rec_dictionary_reset(r); - - bnd->min_bnd_offset = min_bnd_offset; - bnd->min_bnd_entries = r->entries; - bnd->min_bnd_recno = r->recno; - if (dsk->type == WT_PAGE_ROW_INT || - dsk->type == WT_PAGE_ROW_LEAF) - WT_RET(__rec_split_row_promote( - session, r, &bnd->min_bnd_key, dsk->type)); - return (0); - } - - /* We are crossing a split boundary */ - return (__rec_split(session, r, next_len)); -} - /* * __rec_split_raw_worker -- * Handle the raw compression page reconciliation bookkeeping. @@ -2730,7 +2626,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, */ recno = WT_RECNO_OOB; if (dsk->type == WT_PAGE_COL_VAR) - recno = last->max_bnd_recno; + recno = last->recno; entry = max_image_slot = slots = 0; WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { @@ -2957,7 +2853,7 @@ no_slots: */ dst->size = result_len + WT_BLOCK_COMPRESS_SKIP; dsk_dst = dst->mem; - dsk_dst->recno = last->max_bnd_recno; + dsk_dst->recno = last->recno; dsk_dst->mem_size = r->raw_offsets[result_slots] + WT_BLOCK_COMPRESS_SKIP; dsk_dst->u.entries = r->raw_entries[result_slots - 1]; @@ -2977,7 +2873,7 @@ no_slots: WT_RET(__wt_strndup(session, dsk, dsk_dst->mem_size, &last->disk_image)); disk_image = last->disk_image; - disk_image->recno = last->max_bnd_recno; + disk_image->recno = last->recno; disk_image->mem_size = dsk_dst->mem_size; disk_image->u.entries = dsk_dst->u.entries; } @@ -3007,14 +2903,14 @@ no_slots: */ switch (dsk->type) { case WT_PAGE_COL_INT: - next->max_bnd_recno = r->raw_recnos[result_slots]; + next->recno = r->raw_recnos[result_slots]; break; case WT_PAGE_COL_VAR: - next->max_bnd_recno = r->raw_recnos[result_slots - 1]; + next->recno = r->raw_recnos[result_slots - 1]; break; case WT_PAGE_ROW_INT: case WT_PAGE_ROW_LEAF: - next->max_bnd_recno = WT_RECNO_OOB; + next->recno = WT_RECNO_OOB; if (!last_block) { /* * Confirm there was uncompressed data remaining @@ -3023,7 +2919,7 @@ no_slots: */ WT_ASSERT(session, len > 0); WT_RET(__rec_split_row_promote_cell( - session, dsk, &next->max_bnd_key)); + session, dsk, &next->key)); } break; } @@ -3035,7 +2931,7 @@ no_slots: */ WT_STAT_DATA_INCR(session, compress_raw_fail); - dsk->recno = last->max_bnd_recno; + dsk->recno = last->recno; dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); dsk->u.entries = r->entries; r->disk_image.size = dsk->mem_size; @@ -3112,9 +3008,35 @@ __rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) static int __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) { - WT_BOUNDARY *bnd_cur, *bnd_prev; + WT_BOUNDARY *bnd; WT_PAGE_HEADER *dsk; - bool grow_bnd; + + /* Adjust the boundary information based on our split status. */ + switch (r->bnd_state) { + case SPLIT_BOUNDARY: + case SPLIT_MAX: + /* + * We never split, the reconciled page fit into a maximum page + * size. Change the first boundary slot to represent the full + * page (the first boundary slot is largely correct, just update + * the number of entries). + */ + r->bnd_next = 0; + break; + case SPLIT_TRACKING_OFF: + /* + * If we have already split, or aren't tracking boundaries, put + * the remaining data in the next boundary slot. + */ + WT_RET(__rec_split_bnd_grow(session, r)); + break; + case SPLIT_TRACKING_RAW: + /* + * We were configured for raw compression, and either we never + * wrote anything, or there's a remaindered block of data. + */ + break; + } /* * We may arrive here with no entries to write if the page was entirely @@ -3141,66 +3063,20 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) return (EBUSY); } - dsk = r->disk_image.mem; + /* Set the boundary reference and increment the count. */ + bnd = &r->bnd[r->bnd_next++]; + bnd->entries = r->entries; - /* Set the number of entries for the just finished chunk. */ - bnd_cur = &r->bnd[r->bnd_next]; - bnd_cur->max_bnd_entries = r->entries; - - grow_bnd = true; - /* - * We can reach here even with raw_compression when the last split chunk - * is too small to be sent for raw compression. - */ - if (!r->is_bulk_load && !r->raw_compression) { - if (WT_PTRDIFF(r->first_free, dsk) > r->page_size && - r->bnd_next != 0) { - /* - * We hold two boundaries worth of data in the buffer, - * and this data doesn't fit in a single page. If the - * last chunk is too small, readjust the boundary to a - * pre-computed minimum. - * Write out the penultimate chunk to the disk as a page - */ - WT_RET(__rec_split_write_prev_and_shift_cur( - session, r, true)); - } else - if (r->bnd_next != 0) { - /* - * We have two boundaries, but the data in the - * buffer can fit a single page. Merge the - * boundaries to create a single chunk. - */ - bnd_prev = bnd_cur - 1; - bnd_prev->max_bnd_entries += - bnd_cur->max_bnd_entries; - r->bnd_next--; - grow_bnd = false; - } - } - - /* - * We already have space for an extra boundary if we merged two - * boundaries above, in that case we do not need to grow the boundary - * structure. - */ - if (grow_bnd) - WT_RET(__rec_split_bnd_grow(session, r)); - bnd_cur = &r->bnd[r->bnd_next]; - r->bnd_next++; - - /* - * Current boundary now has all the remaining data/last page now. - * Let's write it to the disk - */ - dsk->recno = bnd_cur->max_bnd_recno; - dsk->u.entries = bnd_cur->max_bnd_entries; + /* Finalize the header information. */ + dsk = r->disk_image.mem; + dsk->recno = bnd->recno; + dsk->u.entries = r->entries; dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); r->disk_image.size = dsk->mem_size; /* If this is a checkpoint, we're done, otherwise write the page. */ - return (__rec_is_checkpoint(session, r, bnd_cur) ? - 0 : __rec_split_write(session, r, bnd_cur, &r->disk_image, true)); + return (__rec_is_checkpoint(session, r, bnd) ? + 0 : __rec_split_write(session, r, bnd, &r->disk_image, true)); } /* @@ -3233,6 +3109,98 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) return (__rec_split_finish_std(session, r)); } +/* + * __rec_split_fixup -- + * Fix up after crossing the maximum page boundary. + */ +static int +__rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r) +{ + WT_BOUNDARY *bnd; + WT_BTREE *btree; + WT_DECL_ITEM(tmp); + WT_DECL_RET; + WT_PAGE_HEADER *dsk; + size_t i, len; + uint8_t *dsk_start, *p; + + /* + * When we overflow physical limits of the page, we walk the list of + * split chunks we've created and write those pages out, then update + * the caller's information. + */ + btree = S2BT(session); + + /* + * The data isn't laid out on a page boundary or nul padded; copy it to + * a clean, aligned, padded buffer before writing it. + * + * Allocate a scratch buffer to hold the new disk image. Copy the disk + * page's header and block-manager space into the scratch buffer, most + * of the header information remains unchanged between the pages. + */ + WT_RET(__wt_scr_alloc(session, r->disk_image.memsize, &tmp)); + dsk = tmp->mem; + memcpy(dsk, r->disk_image.mem, WT_PAGE_HEADER_BYTE_SIZE(btree)); + + /* + * For each split chunk we've created, update the disk image and copy + * it into place. + */ + dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); + for (i = 0, bnd = r->bnd; i < r->bnd_next; ++i, ++bnd) { + /* Copy the page contents to the temporary buffer. */ + len = (bnd + 1)->offset - bnd->offset; + memcpy(dsk_start, + (uint8_t *)r->disk_image.mem + bnd->offset, len); + + /* Finalize the header information and write the page. */ + dsk->recno = bnd->recno; + dsk->u.entries = bnd->entries; + tmp->size = WT_PAGE_HEADER_BYTE_SIZE(btree) + len; + dsk->mem_size = WT_STORE_SIZE(tmp->size); + WT_ERR(__rec_split_write(session, r, bnd, tmp, false)); + } + + /* + * There is probably a remnant in the working buffer that didn't get + * written, copy it down to the beginning of the working buffer. + * + * Confirm the remnant is no larger than a split-sized chunk, including + * header. We know that's the maximum sized remnant because we only have + * remnants if split switches from accumulating to a split boundary to + * accumulating to the end of the page (the other path here is when we + * hit a split boundary, there was room for another split chunk in the + * page, and the next item still wouldn't fit, in which case there is no + * remnant). So: we were accumulating to the end of the page and created + * a remnant. We know the remnant cannot be as large as a split-sized + * chunk, including header, because if there was room for that large a + * remnant, we wouldn't have switched from accumulating to a page end. + */ + p = (uint8_t *)r->disk_image.mem + bnd->offset; + len = WT_PTRDIFF(r->first_free, p); + if (len >= r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree)) + WT_PANIC_ERR(session, EINVAL, + "Reconciliation remnant too large for the split buffer"); + dsk = r->disk_image.mem; + dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); + (void)memmove(dsk_start, p, len); + + /* + * Fix up our caller's information, including updating the starting + * record number. + */ + r->entries -= r->total_entries; + r->first_free = dsk_start + len; + WT_ASSERT(session, + r->page_size >= (WT_PAGE_HEADER_BYTE_SIZE(btree) + len)); + r->space_avail = + r->split_size - (WT_PAGE_HEADER_BYTE_SIZE(btree) + len); + +err: __wt_scr_free(session, &tmp); + return (ret); +} + /* * __rec_split_write -- * Write a disk block out for the split helper functions. @@ -3270,6 +3238,8 @@ __rec_split_write(WT_SESSION_IMPL *session, F_SET(dsk, WT_PAGE_EMPTY_V_NONE); } + bnd->entries = r->entries; + /* Initialize the address (set the page type for the parent). */ switch (dsk->type) { case WT_PAGE_COL_FIX: @@ -3315,8 +3285,7 @@ __rec_split_write(WT_SESSION_IMPL *session, switch (page->type) { case WT_PAGE_COL_FIX: case WT_PAGE_COL_VAR: - if (WT_INSERT_RECNO(supd->ins) >= - (bnd + 1)->max_bnd_recno) + if (WT_INSERT_RECNO(supd->ins) >= (bnd + 1)->recno) goto supd_check_complete; break; case WT_PAGE_ROW_LEAF: @@ -3327,8 +3296,8 @@ __rec_split_write(WT_SESSION_IMPL *session, key->data = WT_INSERT_KEY(supd->ins); key->size = WT_INSERT_KEY_SIZE(supd->ins); } - WT_ERR(__wt_compare(session, btree->collator, - key, &(bnd + 1)->max_bnd_key, &cmp)); + WT_ERR(__wt_compare(session, + btree->collator, key, &(bnd + 1)->key, &cmp)); if (cmp >= 0) goto supd_check_complete; break; @@ -3418,14 +3387,14 @@ supd_check_complete: #ifdef HAVE_VERBOSE /* Output a verbose message if we create a page without many entries */ - if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && - bnd->max_bnd_entries < 6) + if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && r->entries < 6) __wt_verbose(session, WT_VERB_SPLIT, "Reconciliation creating a page with %" PRIu32 " entries, memory footprint %" WT_SIZET_FMT - ", page count %" PRIu32 ", %s", bnd->max_bnd_entries, - r->page->memory_footprint, r->bnd_next, - F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint"); + ", page count %" PRIu32 ", %s, split state: %d", + r->entries, r->page->memory_footprint, r->bnd_next, + F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint", + r->bnd_state); #endif WT_ERR(__wt_bt_write(session, buf, addr, &addr_size, @@ -3711,12 +3680,11 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) cursor->value.data, cursor->value.size, (uint64_t)0)); /* Boundary: split or write the page. */ - if (r->raw_compression) { - if (key->len + val->len > r->space_avail) - WT_RET(__rec_split_raw( - session, r, key->len + val->len)); - } else - if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) { + if (key->len + val->len > r->space_avail) { + if (r->raw_compression) + WT_RET( + __rec_split_raw(session, r, key->len + val->len)); + else { /* * Turn off prefix compression until a full key written * to the new page, and (unless already working with an @@ -3728,9 +3696,10 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_RET(__rec_cell_build_leaf_key( session, r, NULL, 0, &ovfl_key)); } - WT_RET(__rec_split_crossing_bnd( - session, r, key->len + val->len)); + + WT_RET(__rec_split(session, r, key->len + val->len)); } + } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -3771,10 +3740,6 @@ __rec_col_fix_bulk_insert_split_check(WT_CURSOR_BULK *cbulk) * split. * * Boundary: split or write the page. - * - * No need to have a minimum split size boundary, all - * pages are filled 100% except the last, allowing it to - * grow in the future. */ __rec_incr(session, r, cbulk->entry, __bitstr_size( @@ -3879,12 +3844,10 @@ __wt_bulk_insert_var( r, cbulk->last.data, cbulk->last.size, cbulk->rle)); /* Boundary: split or write the page. */ - if (r->raw_compression) { - if (val->len > r->space_avail) - WT_RET(__rec_split_raw(session, r, val->len)); - } else - if (WT_CROSSING_SPLIT_BND(r, val->len)) - WT_RET(__rec_split_crossing_bnd(session, r, val->len)); + if (val->len > r->space_avail) + WT_RET(r->raw_compression ? + __rec_split_raw(session, r, val->len) : + __rec_split(session, r, val->len)); /* Copy the value onto the page. */ if (btree->dictionary) @@ -4020,13 +3983,10 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) WT_CHILD_RELEASE_ERR(session, hazard, ref); /* Boundary: split or write the page. */ - if (r->raw_compression) { - if (val->len > r->space_avail) - WT_ERR(__rec_split_raw(session, r, val->len)); - } else - if (WT_CHECK_CROSSING_BND(r, val->len)) - WT_ERR(__rec_split_crossing_bnd( - session, r, val->len)); + if (val->len > r->space_avail) + WT_ERR(r->raw_compression ? + __rec_split_raw(session, r, val->len) : + __rec_split(session, r, val->len)); /* Copy the value onto the page. */ __rec_copy_incr(session, r, val); @@ -4068,13 +4028,10 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) addr->addr, addr->size, __rec_vtype(addr), r->recno); /* Boundary: split or write the page. */ - if (r->raw_compression) { - if (val->len > r->space_avail) - WT_RET(__rec_split_raw(session, r, val->len)); - } else - if (WT_CHECK_CROSSING_BND(r, val->len)) - WT_RET(__rec_split_crossing_bnd( - session, r, val->len)); + if (val->len > r->space_avail) + WT_RET(r->raw_compression ? + __rec_split_raw(session, r, val->len) : + __rec_split(session, r, val->len)); /* Copy the value onto the page. */ __rec_copy_incr(session, r, val); @@ -4182,10 +4139,6 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) * split. * * Boundary: split or write the page. - * - * No need to have a minimum split size boundary, all - * pages are filled 100% except the last, allowing it to - * grow in the future. */ __rec_incr(session, r, entry, __bitstr_size((size_t)entry * btree->bitcnt)); @@ -4342,13 +4295,10 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, session, r, value->data, value->size, rle)); /* Boundary: split or write the page. */ - if (r->raw_compression) { - if (val->len > r->space_avail) - WT_RET(__rec_split_raw(session, r, val->len)); - } else - if (WT_CHECK_CROSSING_BND(r, val->len)) - WT_RET(__rec_split_crossing_bnd( - session, r, val->len)); + if (val->len > r->space_avail) + WT_RET(r->raw_compression ? + __rec_split_raw(session, r, val->len) : + __rec_split(session, r, val->len)); /* Copy the value onto the page. */ if (!deleted && !overflow_type && btree->dictionary) @@ -5011,12 +4961,11 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) r->cell_zero = false; /* Boundary: split or write the page. */ - if (r->raw_compression) { - if (key->len + val->len > r->space_avail) + if (key->len + val->len > r->space_avail) { + if (r->raw_compression) WT_ERR(__rec_split_raw( session, r, key->len + val->len)); - } else - if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) { + else { /* * In one path above, we copied address blocks * from the page rather than building the actual @@ -5028,10 +4977,10 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_IKEY_DATA(ikey), ikey->size)); key_onpage_ovfl = false; } - - WT_ERR(__rec_split_crossing_bnd( + WT_ERR(__rec_split( session, r, key->len + val->len)); } + } /* Copy the key and value onto the page. */ __rec_copy_incr(session, r, key); @@ -5081,14 +5030,10 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) addr->addr, addr->size, __rec_vtype(addr), WT_RECNO_OOB); /* Boundary: split or write the page. */ - if (r->raw_compression) { - if (key->len + val->len > r->space_avail) - WT_RET(__rec_split_raw( - session, r, key->len + val->len)); - } else - if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) - WT_RET(__rec_split_crossing_bnd( - session, r, key->len + val->len)); + if (key->len + val->len > r->space_avail) + WT_RET(r->raw_compression ? + __rec_split_raw(session, r, key->len + val->len) : + __rec_split(session, r, key->len + val->len)); /* Copy the key and value onto the page. */ __rec_copy_incr(session, r, key); @@ -5417,17 +5362,16 @@ build: } /* Boundary: split or write the page. */ - if (r->raw_compression) { - if (key->len + val->len > r->space_avail) + if (key->len + val->len > r->space_avail) { + if (r->raw_compression) WT_ERR(__rec_split_raw( session, r, key->len + val->len)); - } else - if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) { + else { /* - * If we copied address blocks from the page - * rather than building the actual key, we have - * to build the key now because we are about to - * promote it. + * In one path above, we copied address blocks + * from the page rather than building the actual + * key. In that case, we have to build the key + * now because we are about to promote it. */ if (key_onpage_ovfl) { WT_ERR(__wt_dsk_cell_data_ref(session, @@ -5446,13 +5390,14 @@ build: if (!ovfl_key) WT_ERR( __rec_cell_build_leaf_key( - session, r, NULL, 0, - &ovfl_key)); + session, + r, NULL, 0, &ovfl_key)); } - WT_ERR(__rec_split_crossing_bnd( + WT_ERR(__rec_split( session, r, key->len + val->len)); } + } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -5515,12 +5460,11 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins), &ovfl_key)); /* Boundary: split or write the page. */ - if (r->raw_compression) { - if (key->len + val->len > r->space_avail) + if (key->len + val->len > r->space_avail) { + if (r->raw_compression) WT_RET(__rec_split_raw( session, r, key->len + val->len)); - } else - if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) { + else { /* * Turn off prefix compression until a full key * written to the new page, and (unless already @@ -5532,13 +5476,14 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) if (!ovfl_key) WT_RET( __rec_cell_build_leaf_key( - session, r, NULL, 0, - &ovfl_key)); + session, + r, NULL, 0, &ovfl_key)); } - WT_RET(__rec_split_crossing_bnd( + WT_RET(__rec_split( session, r, key->len + val->len)); } + } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -5650,14 +5595,13 @@ __rec_split_dump_keys(WT_SESSION_IMPL *session, WT_PAGE *page, WT_RECONCILE *r) __wt_verbose(session, WT_VERB_SPLIT, "starting key %s", __wt_buf_set_printable( - session, bnd->max_bnd_key.data, - bnd->max_bnd_key.size, tkey)); + session, bnd->key.data, bnd->key.size, tkey)); break; case WT_PAGE_COL_FIX: case WT_PAGE_COL_INT: case WT_PAGE_COL_VAR: __wt_verbose(session, WT_VERB_SPLIT, - "starting recno %" PRIu64, bnd->max_bnd_recno); + "starting recno %" PRIu64, bnd->recno); break; WT_ILLEGAL_VALUE_ERR(session); } @@ -5919,10 +5863,10 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) /* We never set the first page's key, grab it from the original page. */ ref = r->ref; if (__wt_ref_is_root(ref)) - WT_RET(__wt_buf_set(session, &r->bnd[0].max_bnd_key, "", 1)); + WT_RET(__wt_buf_set(session, &r->bnd[0].key, "", 1)); else { __wt_ref_key(ref->home, ref, &p, &size); - WT_RET(__wt_buf_set(session, &r->bnd[0].max_bnd_key, p, size)); + WT_RET(__wt_buf_set(session, &r->bnd[0].key, p, size)); } /* Allocate, then initialize the array of replacement blocks. */ @@ -5930,8 +5874,8 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) for (multi = mod->mod_multi, bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) { - WT_RET(__wt_row_ikey_alloc(session, 0, bnd->max_bnd_key.data, - bnd->max_bnd_key.size, &multi->key.ikey)); + WT_RET(__wt_row_ikey_alloc(session, 0, + bnd->key.data, bnd->key.size, &multi->key.ikey)); /* * Copy any disk image. Don't take saved updates without a @@ -5978,7 +5922,7 @@ __rec_split_col(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) for (multi = mod->mod_multi, bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) { - multi->key.recno = bnd->max_bnd_recno; + multi->key.recno = bnd->recno; /* * Copy any disk image. Don't take saved updates without a diff --git a/test/format/config.h b/test/format/config.h index b5feb7a5321..e3e1e73a786 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -284,7 +284,7 @@ static CONFIG c[] = { { "split_pct", "page split size as a percentage of the maximum page size", - 0x0, 50, 100, 100, &g.c_split_pct, NULL }, + 0x0, 40, 85, 85, &g.c_split_pct, NULL }, { "statistics", "maintain statistics", /* 20% */ -- cgit v1.2.1 From 0a0bfa94d912933bede4f2550ac34a69916cb416 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 29 Mar 2017 16:26:57 -0400 Subject: WT-3244 Turn off in-memory cache-full checks on the metadata file (#3359) This avoids metadata operations failing in in-memory configurations. --- src/btree/bt_handle.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index f2bffee06da..57e0a3422f2 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -359,6 +359,14 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) } else F_CLR(btree, WT_BTREE_IGNORE_CACHE); + /* + * The metadata isn't blocked by in-memory cache limits because metadata + * "unroll" is performed by updates that are potentially blocked by the + * cache-full checks. + */ + if (WT_IS_METADATA(btree->dhandle)) + F_SET(btree, WT_BTREE_IGNORE_CACHE); + WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval)); if (cval.val) F_CLR(btree, WT_BTREE_NO_LOGGING); -- cgit v1.2.1 From 2874db3364248da2e96ca0bde45fa08482445b57 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 30 Mar 2017 07:38:32 +1100 Subject: WT-3208 Don't count page rewrites as eviction making progress. (#3356) --- src/btree/bt_discard.c | 33 +++++++++++++++++++++++++++------ src/btree/bt_split.c | 2 +- src/include/btree.i | 12 ++++++++++-- src/include/extern.h | 1 + 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index d2beb84fee9..bab7b8145d6 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -16,13 +16,14 @@ static void __free_skip_array( WT_SESSION_IMPL *, WT_INSERT_HEAD **, uint32_t, bool); static void __free_skip_list(WT_SESSION_IMPL *, WT_INSERT *, bool); static void __free_update(WT_SESSION_IMPL *, WT_UPDATE **, uint32_t, bool); +static void __page_out_int(WT_SESSION_IMPL *, WT_PAGE **, bool); /* - * __wt_ref_out -- + * __wt_ref_out_int -- * Discard an in-memory page, freeing all memory associated with it. */ void -__wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) +__wt_ref_out_int(WT_SESSION_IMPL *session, WT_REF *ref, bool rewrite) { /* * A version of the page-out function that allows us to make additional @@ -56,15 +57,25 @@ __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) } #endif - __wt_page_out(session, &ref->page); + __page_out_int(session, &ref->page, rewrite); } /* - * __wt_page_out -- + * __wt_ref_out -- * Discard an in-memory page, freeing all memory associated with it. */ void -__wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) +__wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) +{ + __wt_ref_out_int(session, ref, false); +} + +/* + * __page_out_int -- + * Discard an in-memory page, freeing all memory associated with it. + */ +static void +__page_out_int(WT_SESSION_IMPL *session, WT_PAGE **pagep, bool rewrite) { WT_PAGE *page; WT_PAGE_HEADER *dsk; @@ -103,7 +114,7 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) } /* Update the cache's information. */ - __wt_cache_page_evict(session, page); + __wt_cache_page_evict(session, page, rewrite); dsk = (WT_PAGE_HEADER *)page->dsk; if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC)) @@ -147,6 +158,16 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) __wt_overwrite_and_free(session, page); } +/* + * __wt_page_out -- + * Discard an in-memory page, freeing all memory associated with it. + */ +void +__wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) +{ + __page_out_int(session, pagep, false); +} + /* * __free_page_modify -- * Discard the page's associated modification structures. diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index b1bad760826..49043c8bab4 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -2274,7 +2274,7 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi) * reconciliation, do it now. */ __wt_page_modify_clear(session, page); - __wt_ref_out(session, ref); + __wt_ref_out_int(session, ref, true); /* Swap the new page into place. */ ref->page = new->page; diff --git a/src/include/btree.i b/src/include/btree.i index eefc2db075d..a4d88d5fda1 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -413,7 +413,7 @@ __wt_cache_page_image_incr(WT_SESSION_IMPL *session, uint32_t size) * Evict pages from the cache. */ static inline void -__wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) +__wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page, bool rewrite) { WT_BTREE *btree; WT_CACHE *cache; @@ -456,7 +456,15 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) /* Update pages and bytes evicted. */ (void)__wt_atomic_add64(&cache->bytes_evict, page->memory_footprint); - (void)__wt_atomic_addv64(&cache->pages_evict, 1); + + /* + * Don't count rewrites as eviction: there's no guarantee we are making + * real progress. + */ + if (rewrite) + (void)__wt_atomic_subv64(&cache->pages_inmem, 1); + else + (void)__wt_atomic_addv64(&cache->pages_evict, 1); } /* diff --git a/src/include/extern.h b/src/include/extern.h index a7eb4b491a9..2759ac1dec3 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -126,6 +126,7 @@ extern int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) extern void __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_ref_out_int(WT_SESSION_IMPL *session, WT_REF *ref, bool rewrite) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_free_ref( WT_SESSION_IMPL *session, WT_REF *ref, int page_type, bool free_pages) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -- cgit v1.2.1 From f379b4be6881ebda712f79053b6dc1e13938e59a Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 29 Mar 2017 17:15:25 -0400 Subject: WT-3155 Remove WT_CONN_SERVER_RUN flag (#3344) Set WT_CONN_CLOSING earlier in the connection close process (before calling the async close functions). This requires removing the assert in btree handle open that close hasn't yet been called. Add a barrier after setting the connection close flag to ensure the write is flushed. LSM workers checked both the WT_CONN_SERVER_RUN and WT_LSM_WORKER_RUN flags because the LSM destroy path (__lsm_manager_worker_shutdown), didn't clear WT_LSM_WORKER_RUN flag. Add that clear, change __lsm_worker to only check WT_LSM_WORKER_RUN. Previously, the LSM manager checked the WT_CONN_SERVER_RUN flag in the LSM destroy path and connection shutdown waited on the LSM manager to stop and clear WT_CONN_SERVER_LSM. Flip that process: the LSM shutdown path now clears WT_CONN_SERVER_LSM, and the LSM manager stops when it sees WT_CONN_SERVER_LSM is cleared. The LSM manager sets a new flag, WT_LSM_MANAGER_SHUTDOWN, when it's stopped, and the shutdown process waits on that new flag. Add memory barriers to the thread create and join functions. WiredTiger typically sets (clears) state and expects threads to see the state and start (stop). It simpler and safer if we imply a barrier in the thread API. * Rename WT_CONN_LOG_SERVER_RUN to WT_CONN_SERVER_LOG to match the other server flags. * Once the async and LSM servers have exited, assert no more files are opened. * Instead of using a barrier to ensure the worker run state isn't cached, declare the structure field volatile. Use a stand-alone structure field instead of a set of flags, it's a simpler "volatile" story. * In one of two places, when shutting down worker threads, we signalled the condition variable to wake the worker thread. For consistency, remove the signal (we're only sleeping for 100th of a second, the wake isn't buying us anything). * Restore the assertion in __open_session() that we're not in the "closing" path, returning an error is more dangerous, it might cause a thread to panic, and then we have a panic racing with the close. * A wt_thread_t (POSIX pthread_t) is an opaque type, and can't be assigned to 0 or tested against an integral value portably. Add a bool WT_LSM_WORKER_ARGS.tid_set field instead of assigning or testing the wt_thread_t. We already have an __wt_lsm_start function, add a __wt_lsm_stop function and move the setting/clearing of the WT_LSM_WORKER_ARGS.{running,tid_set} fields into those functions so we ensure the ordering is correct. --- dist/flags.py | 4 ++-- src/conn/conn_dhandle.c | 3 ++- src/conn/conn_log.c | 12 +++++------ src/conn/conn_open.c | 19 ++++++++++------- src/include/extern.h | 1 + src/include/flags.h | 14 ++++++------ src/include/lsm.h | 10 +++++++-- src/lsm/lsm_manager.c | 54 +++++++++++++++++++++++++---------------------- src/lsm/lsm_worker.c | 23 +++++++++++++++----- src/os_posix/os_thread.c | 14 ++++++++++++ src/os_win/os_thread.c | 14 ++++++++++++ src/session/session_api.c | 4 ++-- 12 files changed, 114 insertions(+), 58 deletions(-) diff --git a/dist/flags.py b/dist/flags.py index b20a7181532..64b5d789e72 100644 --- a/dist/flags.py +++ b/dist/flags.py @@ -96,19 +96,19 @@ flags = { 'CONN_CACHE_POOL', 'CONN_CKPT_SYNC', 'CONN_CLOSING', + 'CONN_CLOSING_NO_MORE_OPENS', 'CONN_EVICTION_RUN', 'CONN_IN_MEMORY', 'CONN_LAS_OPEN', 'CONN_LEAK_MEMORY', - 'CONN_LOG_SERVER_RUN', 'CONN_LSM_MERGE', 'CONN_PANIC', 'CONN_READONLY', 'CONN_RECOVERING', 'CONN_SERVER_ASYNC', 'CONN_SERVER_CHECKPOINT', + 'CONN_SERVER_LOG', 'CONN_SERVER_LSM', - 'CONN_SERVER_RUN', 'CONN_SERVER_STATISTICS', 'CONN_SERVER_SWEEP', 'CONN_WAS_BACKUP', diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index c5480897494..657cdebf7ee 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -314,7 +314,8 @@ __wt_conn_btree_open( F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) && !LF_ISSET(WT_DHANDLE_LOCK_ONLY)); - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_CLOSING)); + WT_ASSERT(session, + !F_ISSET(S2C(session), WT_CONN_CLOSING_NO_MORE_OPENS)); /* * If the handle is already open, it has to be closed so it can be diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index c6dd795389d..b8b5bd2a908 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -341,7 +341,7 @@ __wt_log_truncate_files( conn = S2C(session); if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) return (0); - if (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN) && + if (F_ISSET(conn, WT_CONN_SERVER_LOG) && FLD_ISSET(conn->log_flags, WT_CONN_LOG_ARCHIVE)) WT_RET_MSG(session, EINVAL, "Attempt to archive manually while a server is running"); @@ -382,7 +382,7 @@ __log_file_server(void *arg) conn = S2C(session); log = conn->log; locked = false; - while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { + while (F_ISSET(conn, WT_CONN_SERVER_LOG)) { /* * If there is a log file to close, make sure any outstanding * write operations have completed, then fsync and close it. @@ -708,7 +708,7 @@ __log_wrlsn_server(void *arg) log = conn->log; yield = 0; WT_INIT_LSN(&prev); - while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { + while (F_ISSET(conn, WT_CONN_SERVER_LOG)) { /* * Write out any log record buffers if anything was done * since last time. Only call the function to walk the @@ -783,7 +783,7 @@ __log_server(void *arg) * takes to sync out an earlier file. */ did_work = true; - while (F_ISSET(conn, WT_CONN_LOG_SERVER_RUN)) { + while (F_ISSET(conn, WT_CONN_SERVER_LOG)) { /* * Slots depend on future activity. Force out buffered * writes in case we are idle. This cannot be part of the @@ -923,7 +923,7 @@ __wt_logmgr_open(WT_SESSION_IMPL *session) if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) return (0); - F_SET(conn, WT_CONN_LOG_SERVER_RUN); + F_SET(conn, WT_CONN_SERVER_LOG); /* * Start the log close thread. It is not configurable. @@ -995,7 +995,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) conn = S2C(session); - F_CLR(conn, WT_CONN_LOG_SERVER_RUN); + F_CLR(conn, WT_CONN_SERVER_LOG); if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) { /* diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index 5b20377d437..eb3c79422a0 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -21,12 +21,6 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) session = conn->default_session; WT_ASSERT(session, session->iface.connection == &conn->iface); - /* - * Tell internal server threads to run: this must be set before opening - * any sessions. - */ - F_SET(conn, WT_CONN_SERVER_RUN); - /* WT_SESSION_IMPL array. */ WT_RET(__wt_calloc(session, conn->session_size, sizeof(WT_SESSION_IMPL), &conn->sessions)); @@ -100,6 +94,10 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) __wt_yield(); } + /* Shut down the subsystems, ensuring workers see the state change. */ + F_SET(conn, WT_CONN_CLOSING); + WT_FULL_BARRIER(); + /* * Clear any pending async operations and shut down the async worker * threads and system before closing LSM. @@ -113,10 +111,15 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) * btree handles, so take care in ordering shutdown to make sure they * exit before files are closed. */ - F_CLR(conn, WT_CONN_SERVER_RUN); WT_TRET(__wt_lsm_manager_destroy(session)); - F_SET(conn, WT_CONN_CLOSING); + /* + * Once the async and LSM threads exit, we shouldn't be opening any + * more files. + */ + F_SET(conn, WT_CONN_CLOSING_NO_MORE_OPENS); + WT_FULL_BARRIER(); + WT_TRET(__wt_checkpoint_server_destroy(session)); WT_TRET(__wt_statlog_destroy(session, true)); WT_TRET(__wt_sweep_destroy(session)); diff --git a/src/include/extern.h b/src/include/extern.h index 2759ac1dec3..47b4e03a7b7 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -458,6 +458,7 @@ extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_apply_all(WT_SESSION_IMPL *session, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_checkpoint(WT_SESSION_IMPL *session, const char *fname, const char *checkpoint, WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_checkpoint_last_name( WT_SESSION_IMPL *session, const char *fname, const char **namep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/flags.h b/src/include/flags.h index c1fff920e3b..f26a45c68f5 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -6,19 +6,19 @@ #define WT_CONN_CACHE_POOL 0x00000001 #define WT_CONN_CKPT_SYNC 0x00000002 #define WT_CONN_CLOSING 0x00000004 -#define WT_CONN_EVICTION_RUN 0x00000008 -#define WT_CONN_IN_MEMORY 0x00000010 -#define WT_CONN_LAS_OPEN 0x00000020 -#define WT_CONN_LEAK_MEMORY 0x00000040 -#define WT_CONN_LOG_SERVER_RUN 0x00000080 +#define WT_CONN_CLOSING_NO_MORE_OPENS 0x00000008 +#define WT_CONN_EVICTION_RUN 0x00000010 +#define WT_CONN_IN_MEMORY 0x00000020 +#define WT_CONN_LAS_OPEN 0x00000040 +#define WT_CONN_LEAK_MEMORY 0x00000080 #define WT_CONN_LSM_MERGE 0x00000100 #define WT_CONN_PANIC 0x00000200 #define WT_CONN_READONLY 0x00000400 #define WT_CONN_RECOVERING 0x00000800 #define WT_CONN_SERVER_ASYNC 0x00001000 #define WT_CONN_SERVER_CHECKPOINT 0x00002000 -#define WT_CONN_SERVER_LSM 0x00004000 -#define WT_CONN_SERVER_RUN 0x00008000 +#define WT_CONN_SERVER_LOG 0x00004000 +#define WT_CONN_SERVER_LSM 0x00008000 #define WT_CONN_SERVER_STATISTICS 0x00010000 #define WT_CONN_SERVER_SWEEP 0x00020000 #define WT_CONN_WAS_BACKUP 0x00040000 diff --git a/src/include/lsm.h b/src/include/lsm.h index 2bbb813bad2..e3f6897ef9d 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -23,11 +23,14 @@ struct __wt_lsm_worker_cookie { struct __wt_lsm_worker_args { WT_SESSION_IMPL *session; /* Session */ WT_CONDVAR *work_cond; /* Owned by the manager */ + wt_thread_t tid; /* Thread id */ + bool tid_set; /* Thread id set */ + u_int id; /* My manager slot id */ uint32_t type; /* Types of operations handled */ -#define WT_LSM_WORKER_RUN 0x01 - uint32_t flags; /* Worker flags */ + + volatile bool running; /* Worker is running */ }; /* @@ -162,6 +165,9 @@ struct __wt_lsm_manager { #define WT_LSM_MAX_WORKERS 20 #define WT_LSM_MIN_WORKERS 3 WT_LSM_WORKER_ARGS lsm_worker_cookies[WT_LSM_MAX_WORKERS]; + +#define WT_LSM_MANAGER_SHUTDOWN 0x01 /* Manager has shut down */ + uint32_t flags; }; /* diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 6dc06146179..e33e119aa41 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -89,7 +89,6 @@ __lsm_general_worker_start(WT_SESSION_IMPL *session) if (manager->lsm_workers % 2 == 0) FLD_SET(worker_args->type, WT_LSM_WORK_MERGE); } - F_SET(worker_args, WT_LSM_WORKER_RUN); WT_RET(__wt_lsm_worker_start(session, worker_args)); } @@ -129,17 +128,13 @@ __lsm_stop_workers(WT_SESSION_IMPL *session) manager->lsm_workers--) { worker_args = &manager->lsm_worker_cookies[manager->lsm_workers - 1]; - /* - * Clear this worker's flag so it stops. - */ - F_CLR(worker_args, WT_LSM_WORKER_RUN); - WT_ASSERT(session, worker_args->tid != 0); - WT_RET(__wt_thread_join(session, worker_args->tid)); - worker_args->tid = 0; + WT_ASSERT(session, worker_args->tid_set); + + WT_RET(__wt_lsm_worker_stop(session, worker_args)); worker_args->type = 0; - worker_args->flags = 0; + /* - * We do not clear the session because they are allocated + * We do not clear the other fields because they are allocated * statically when the connection was opened. */ } @@ -237,12 +232,12 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session) manager->lsm_worker_cookies[i].session = worker_session; } + F_SET(conn, WT_CONN_SERVER_LSM); + /* Start the LSM manager thread. */ WT_ERR(__wt_thread_create(session, &manager->lsm_worker_cookies[0].tid, __lsm_worker_manager, &manager->lsm_worker_cookies[0])); - F_SET(conn, WT_CONN_SERVER_LSM); - if (0) { err: for (i = 0; (worker_session = @@ -289,13 +284,18 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) manager = &conn->lsm_manager; removed = 0; + /* + * Clear the LSM server flag and flush to ensure running threads see + * the state change. + */ + F_CLR(conn, WT_CONN_SERVER_LSM); + WT_FULL_BARRIER(); + WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) || manager->lsm_workers == 0); if (manager->lsm_workers > 0) { - /* - * Stop the main LSM manager thread first. - */ - while (F_ISSET(conn, WT_CONN_SERVER_LSM)) + /* Wait for the main LSM manager thread to finish. */ + while (!F_ISSET(manager, WT_LSM_MANAGER_SHUTDOWN)) __wt_yield(); /* Clean up open LSM handles. */ @@ -303,7 +303,6 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) WT_TRET(__wt_thread_join( session, manager->lsm_worker_cookies[0].tid)); - manager->lsm_worker_cookies[0].tid = 0; /* Release memory from any operations left on the queue. */ while ((current = TAILQ_FIRST(&manager->switchqh)) != NULL) { @@ -342,7 +341,7 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) /* * __lsm_manager_worker_shutdown -- - * Shutdown the LSM manager and worker threads. + * Shutdown the LSM worker threads. */ static int __lsm_manager_worker_shutdown(WT_SESSION_IMPL *session) @@ -354,14 +353,13 @@ __lsm_manager_worker_shutdown(WT_SESSION_IMPL *session) manager = &S2C(session)->lsm_manager; /* - * Wait for the rest of the LSM workers to shutdown. Stop at index + * Wait for the rest of the LSM workers to shutdown. Start at index * one - since we (the manager) are at index 0. */ for (i = 1; i < manager->lsm_workers; i++) { - WT_ASSERT(session, manager->lsm_worker_cookies[i].tid != 0); - __wt_cond_signal(session, manager->work_cond); - WT_TRET(__wt_thread_join( - session, manager->lsm_worker_cookies[i].tid)); + WT_ASSERT(session, manager->lsm_worker_cookies[i].tid_set); + WT_TRET(__wt_lsm_worker_stop( + session, &manager->lsm_worker_cookies[i])); } return (ret); } @@ -383,7 +381,7 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) conn = S2C(session); dhandle_locked = false; - while (F_ISSET(conn, WT_CONN_SERVER_RUN)) { + while (F_ISSET(conn, WT_CONN_SERVER_LSM)) { __wt_sleep(0, 10000); if (TAILQ_EMPTY(&conn->lsmqh)) continue; @@ -469,11 +467,13 @@ static WT_THREAD_RET __lsm_worker_manager(void *arg) { WT_DECL_RET; + WT_LSM_MANAGER *manager; WT_LSM_WORKER_ARGS *cookie; WT_SESSION_IMPL *session; cookie = (WT_LSM_WORKER_ARGS *)arg; session = cookie->session; + manager = &S2C(session)->lsm_manager; WT_ERR(__lsm_general_worker_start(session)); WT_ERR(__lsm_manager_run_server(session)); @@ -482,7 +482,11 @@ __lsm_worker_manager(void *arg) if (ret != 0) { err: WT_PANIC_MSG(session, ret, "LSM worker manager thread error"); } - F_CLR(S2C(session), WT_CONN_SERVER_LSM); + + /* Connection close waits on us to shutdown, let it know we're done. */ + F_SET(manager, WT_LSM_MANAGER_SHUTDOWN); + WT_FULL_BARRIER(); + return (WT_THREAD_RET_VALUE); } diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index ffa00c0a5e7..1cabbd4888d 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -21,7 +21,23 @@ __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) { __wt_verbose(session, WT_VERB_LSM_MANAGER, "Start LSM worker %u type %#" PRIx32, args->id, args->type); - return (__wt_thread_create(session, &args->tid, __lsm_worker, args)); + + args->running = true; + WT_RET(__wt_thread_create(session, &args->tid, __lsm_worker, args)); + args->tid_set = true; + return (0); +} + +/* + * __wt_lsm_worker_stop -- + * A wrapper around the LSM worker thread stop. + */ +int +__wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) +{ + args->running = false; + args->tid_set = false; + return (__wt_thread_join(session, args->tid)); } /* @@ -84,7 +100,6 @@ err: __wt_lsm_manager_free_work_unit(session, entry); static WT_THREAD_RET __lsm_worker(void *arg) { - WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LSM_WORK_UNIT *entry; WT_LSM_WORKER_ARGS *cookie; @@ -93,11 +108,9 @@ __lsm_worker(void *arg) cookie = (WT_LSM_WORKER_ARGS *)arg; session = cookie->session; - conn = S2C(session); entry = NULL; - while (F_ISSET(conn, WT_CONN_SERVER_RUN) && - F_ISSET(cookie, WT_LSM_WORKER_RUN)) { + while (cookie->running) { progress = false; /* diff --git a/src/os_posix/os_thread.c b/src/os_posix/os_thread.c index 85d43f10a33..18e4c347436 100644 --- a/src/os_posix/os_thread.c +++ b/src/os_posix/os_thread.c @@ -18,6 +18,13 @@ __wt_thread_create(WT_SESSION_IMPL *session, { WT_DECL_RET; + /* + * Creating a thread isn't a memory barrier, but WiredTiger commonly + * sets flags and or state and then expects worker threads to start. + * Include a barrier to ensure safety in those cases. + */ + WT_FULL_BARRIER(); + /* Spawn a new thread of control. */ WT_SYSCALL_RETRY(pthread_create(tidret, NULL, func, arg), ret); if (ret == 0) @@ -34,6 +41,13 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) { WT_DECL_RET; + /* + * Joining a thread isn't a memory barrier, but WiredTiger commonly + * sets flags and or state and then expects worker threads to halt. + * Include a barrier to ensure safety in those cases. + */ + WT_FULL_BARRIER(); + WT_SYSCALL(pthread_join(tid, NULL), ret); if (ret == 0) return (0); diff --git a/src/os_win/os_thread.c b/src/os_win/os_thread.c index 7442fb08a36..4c8f212bb4f 100644 --- a/src/os_win/os_thread.c +++ b/src/os_win/os_thread.c @@ -16,6 +16,13 @@ int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) { + /* + * Creating a thread isn't a memory barrier, but WiredTiger commonly + * sets flags and or state and then expects worker threads to start. + * Include a barrier to ensure safety in those cases. + */ + WT_FULL_BARRIER(); + /* Spawn a new thread of control. */ *tidret = (HANDLE)_beginthreadex(NULL, 0, func, arg, 0, NULL); if (*tidret != 0) @@ -33,6 +40,13 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) { DWORD windows_error; + /* + * Joining a thread isn't a memory barrier, but WiredTiger commonly + * sets flags and or state and then expects worker threads to halt. + * Include a barrier to ensure safety in those cases. + */ + WT_FULL_BARRIER(); + if ((windows_error = WaitForSingleObject(tid, INFINITE)) != WAIT_OBJECT_0) { if (windows_error == WAIT_FAILED) diff --git a/src/session/session_api.c b/src/session/session_api.c index 51233e5e224..b7daf0e2e02 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -1502,7 +1502,7 @@ __transaction_sync_run_chk(WT_SESSION_IMPL *session) conn = S2C(session); - return (FLD_ISSET(conn->flags, WT_CONN_LOG_SERVER_RUN)); + return (FLD_ISSET(conn->flags, WT_CONN_SERVER_LOG)); } /* @@ -1812,7 +1812,7 @@ __open_session(WT_CONNECTION_IMPL *conn, * closes the connection. This is particularly intended to catch * cases where server threads open sessions. */ - WT_ASSERT(session, F_ISSET(conn, WT_CONN_SERVER_RUN)); + WT_ASSERT(session, !F_ISSET(conn, WT_CONN_CLOSING)); /* Find the first inactive session slot. */ for (session_ret = conn->sessions, -- cgit v1.2.1 From 478c69bde8244349bf2b41505a877889fef3c500 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 30 Mar 2017 01:39:11 -0400 Subject: WT-2439 Enhance reconciliation page layout (#3358) * Set minimum split pct to 50. * The leaf-page value dictionary stores cell offsets in the disk image, which implies a dictionary reset any time we hit a boundary or grow the disk image buffer. Recent changes broke that, we weren't resetting the dictionary when the disk image buffer was resized. Instead of clearing the dictionary on buffer resize, switch to using cell offsets in the dictionary instead of cell pointers. It's unlikely to be a big win for many workloads, but it might help some, and it's cleaner than resetting the dictionary more often. Add a verify of disk images we don't write: the I/O routines verify any image we write, but we need to verify any image we create. --- dist/api_data.py | 4 +- src/btree/bt_handle.c | 9 +- src/config/config_def.c | 16 +- src/include/btree.h | 6 + src/include/wiredtiger.in | 4 +- src/reconcile/rec_write.c | 1029 +++++++++++++++++++++++++-------------------- test/format/config.h | 2 +- 7 files changed, 591 insertions(+), 479 deletions(-) diff --git a/dist/api_data.py b/dist/api_data.py index 1d669fa7fe0..22600dd5e29 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -295,12 +295,12 @@ file_config = format_meta + file_runtime_config + [ Config('split_deepen_per_child', '0', r''' entries allocated per child when deepening the tree''', type='int', undoc=True), - Config('split_pct', '75', r''' + Config('split_pct', '90', r''' the Btree page split size as a percentage of the maximum Btree page size, that is, when a Btree page is split, it will be split into smaller pages, where each page is the specified percentage of the maximum Btree page size''', - min='25', max='100'), + min='50', max='100'), ] # File metadata, including both configurable and non-configurable (internal) diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 57e0a3422f2..d76720b19ae 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -788,9 +788,16 @@ __btree_page_sizes(WT_SESSION_IMPL *session) * Get the split percentage (reconciliation splits pages into smaller * than the maximum page size chunks so we don't split every time a * new entry is added). Determine how large newly split pages will be. + * Set to the minimum, if the read value is less than that. */ WT_RET(__wt_config_gets(session, cfg, "split_pct", &cval)); - btree->split_pct = (int)cval.val; + if (cval.val < WT_BTREE_MIN_SPLIT_PCT) { + btree->split_pct = WT_BTREE_MIN_SPLIT_PCT; + WT_RET(__wt_msg(session, + "Re-setting split_pct for %s to the minimum allowed of " + "%d%%.", session->dhandle->name, WT_BTREE_MIN_SPLIT_PCT)); + } else + btree->split_pct = (int)cval.val; intl_split_size = __wt_split_page_size(btree, btree->maxintlpage); leaf_split_size = __wt_split_page_size(btree, btree->maxleafpage); diff --git a/src/config/config_def.c b/src/config/config_def.c index b11a8d63fdb..f152fbacad4 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -294,7 +294,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = { { "source", "string", NULL, NULL, NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "type", "string", NULL, NULL, NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, @@ -466,7 +466,7 @@ static const WT_CONFIG_CHECK confchk_file_config[] = { { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, @@ -530,7 +530,7 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = { { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, @@ -614,7 +614,7 @@ static const WT_CONFIG_CHECK confchk_lsm_meta[] = { { "prefix_compression_min", "int", NULL, "min=0", NULL, 0 }, { "split_deepen_min_child", "int", NULL, NULL, NULL, 0 }, { "split_deepen_per_child", "int", NULL, NULL, NULL, 0 }, - { "split_pct", "int", NULL, "min=25,max=100", NULL, 0 }, + { "split_pct", "int", NULL, "min=50,max=100", NULL, 0 }, { "value_format", "format", __wt_struct_confchk, NULL, NULL, 0 }, @@ -1119,7 +1119,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "chunk_size=10MB,merge_max=15,merge_min=0),memory_page_max=5MB," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,source=,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=75,type=file,value_format=u", + "split_deepen_per_child=0,split_pct=90,type=file,value_format=u", confchk_WT_SESSION_create, 42 }, { "WT_SESSION.drop", @@ -1213,7 +1213,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "leaf_value_max=0,log=(enabled=true),memory_page_max=5MB," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=75,value_format=u", + "split_deepen_per_child=0,split_pct=90,value_format=u", confchk_file_config, 35 }, { "file.meta", @@ -1228,7 +1228,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=0," "log=(enabled=true),memory_page_max=5MB,os_cache_dirty_max=0," "os_cache_max=0,prefix_compression=false,prefix_compression_min=4" - ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75," + ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90," "value_format=u,version=(major=0,minor=0)", confchk_file_meta, 39 }, @@ -1253,7 +1253,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { "merge_min=0),memory_page_max=5MB,old_chunks=," "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false," "prefix_compression_min=4,split_deepen_min_child=0," - "split_deepen_per_child=0,split_pct=75,value_format=u", + "split_deepen_per_child=0,split_pct=90,value_format=u", confchk_lsm_meta, 39 }, { "table.meta", diff --git a/src/include/btree.h b/src/include/btree.h index 88312f408cc..28fe1b94b23 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -57,6 +57,12 @@ /* Evict pages if we see this many consecutive deleted records. */ #define WT_BTREE_DELETE_THRESHOLD 1000 +/* + * Minimum size of the chunks (in percentage of the page size) a page gets split + * into during reconciliation. + */ +#define WT_BTREE_MIN_SPLIT_PCT 50 + /* * WT_BTREE -- * A btree handle. diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 558e93d3de0..707159ef6ae 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1242,8 +1242,8 @@ struct __wt_session { * @config{split_pct, the Btree page split size as a percentage of the * maximum Btree page size\, that is\, when a Btree page is split\, it * will be split into smaller pages\, where each page is the specified - * percentage of the maximum Btree page size., an integer between 25 and - * 100; default \c 75.} + * percentage of the maximum Btree page size., an integer between 50 and + * 100; default \c 90.} * @config{type, set the type of data source used to store a column * group\, index or simple table. By default\, a \c "file:" URI is * derived from the object name. The \c type configuration can be used diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 23f654caa70..6f95b84d292 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -26,6 +26,11 @@ typedef struct { uint32_t flags; /* Caller's configuration */ WT_ITEM disk_image; /* Temporary disk-image buffer */ + /* + * Temporary buffer used to write out a disk image when managing two + * chunks worth of data in memory + */ + WT_ITEM *interim_buf; /* * Track start/stop write generation to decide if all changes to the @@ -127,6 +132,7 @@ typedef struct { * repeatedly split a packed page. */ uint32_t split_size; /* Split page size */ + uint32_t min_split_size; /* Minimum split page size */ /* * The problem with splits is we've done a lot of work by the time we @@ -151,16 +157,6 @@ typedef struct { */ size_t offset; /* Split's first byte */ - /* - * The recno and entries fields are the starting record number - * of the split chunk (for column-store splits), and the number - * of entries in the split chunk. These fields are used both - * to write the split chunk, and to create a new internal page - * to reference the split pages. - */ - uint64_t recno; /* Split's starting record */ - uint32_t entries; /* Split's entries */ - WT_ADDR addr; /* Split's written location */ uint32_t size; /* Split's size */ uint32_t checksum; /* Split's checksum */ @@ -182,39 +178,42 @@ typedef struct { size_t supd_allocated; /* + * While reconciling pages, at any given time, we maintain two + * split chunks in the memory to be written out as pages. As we + * get to the last two chunks, if the last one turns out to be + * smaller than the minimum split size, we go back into the + * penultimate chunk and split at this minimum split size + * boundary. This moves some data from the penultimate chunk to + * the last chunk, hence increasing the size of the last page + * written without decreasing the penultimate page size beyond + * the minimum split size. For this reason, we maintain both a + * maximum split percentage boundary and a minimum split + * percentage boundary. + * + * The recno and entries fields are the starting record number + * of the split chunk (for column-store splits), and the number + * of entries in the split chunk. These fields are used both to + * write the split chunk, and to create a new internal page to + * reference the split pages. + * * The key for a row-store page; no column-store key is needed * because the page's recno, stored in the recno field, is the * column-store key. */ - WT_ITEM key; /* Promoted row-store key */ + uint32_t max_bnd_entries; + uint64_t max_bnd_recno; + WT_ITEM max_bnd_key; + + size_t min_bnd_offset; + uint32_t min_bnd_entries; + uint64_t min_bnd_recno; + WT_ITEM min_bnd_key; } *bnd; /* Saved boundaries */ uint32_t bnd_next; /* Next boundary slot */ uint32_t bnd_next_max; /* Maximum boundary slots used */ size_t bnd_entries; /* Total boundary slots */ size_t bnd_allocated; /* Bytes allocated */ - /* - * We track the total number of page entries copied into split chunks - * so we can easily figure out how many entries in the current split - * chunk. - */ - uint32_t total_entries; /* Total entries in splits */ - - /* - * And there's state information as to where in this process we are: - * (1) tracking split boundaries because we can still fit more split - * chunks into the maximum page size, (2) tracking the maximum page - * size boundary because we can't fit any more split chunks into the - * maximum page size, (3) not performing boundary checks because it's - * either not useful with the current page size configuration, or - * because we've already been forced to split. - */ - enum { SPLIT_BOUNDARY=0, /* Next: a split page boundary */ - SPLIT_MAX=1, /* Next: the maximum page boundary */ - SPLIT_TRACKING_OFF=2, /* No boundary checks */ - SPLIT_TRACKING_RAW=3 } /* Underlying compression decides */ - bnd_state; - /* * We track current information about the current record number, the * number of entries copied into the temporary buffer, where we are @@ -226,6 +225,8 @@ typedef struct { uint32_t entries; /* Current number of entries */ uint8_t *first_free; /* Current first free byte */ size_t space_avail; /* Remaining space in this chunk */ + /* Remaining space in this chunk to put a minimum size boundary */ + size_t min_space_avail; /* * Saved update list, supporting the WT_EVICT_UPDATE_RESTORE and @@ -247,15 +248,14 @@ typedef struct { /* * WT_DICTIONARY -- - * We optionally build a dictionary of row-store values for leaf - * pages. Where two value cells are identical, only write the value - * once, the second and subsequent copies point to the original cell. - * The dictionary is fixed size, but organized in a skip-list to make - * searches faster. + * We optionally build a dictionary of values for leaf pages. Where + * two value cells are identical, only write the value once, the second + * and subsequent copies point to the original cell. The dictionary is + * fixed size, but organized in a skip-list to make searches faster. */ struct __rec_dictionary { uint64_t hash; /* Hash value */ - void *cell; /* Matching cell */ + uint32_t offset; /* Matching cell */ u_int depth; /* Skiplist */ WT_DICTIONARY *next[0]; @@ -293,6 +293,13 @@ typedef struct { uint32_t tested_ref_state; /* Debugging information */ } WT_RECONCILE; +#define WT_CROSSING_MIN_BND(r, next_len) \ + ((r)->bnd[(r)->bnd_next].min_bnd_offset == 0 && \ + (next_len) > (r)->min_space_avail) +#define WT_CROSSING_SPLIT_BND(r, next_len) ((next_len) > (r)->space_avail) +#define WT_CHECK_CROSSING_BND(r, next_len) \ + (WT_CROSSING_MIN_BND(r, next_len) || WT_CROSSING_SPLIT_BND(r, next_len)) + static void __rec_bnd_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *, bool); static void __rec_cell_build_addr(WT_SESSION_IMPL *, WT_RECONCILE *, const void *, size_t, u_int, uint64_t); @@ -314,6 +321,7 @@ static int __rec_col_var(WT_SESSION_IMPL *, static int __rec_col_var_helper(WT_SESSION_IMPL *, WT_RECONCILE *, WT_SALVAGE_COOKIE *, WT_ITEM *, bool, uint8_t, uint64_t); static int __rec_destroy_session(WT_SESSION_IMPL *); +static uint32_t __rec_min_split_page_size(WT_BTREE *, uint32_t); static int __rec_root_write(WT_SESSION_IMPL *, WT_PAGE *, uint32_t); static int __rec_row_int(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_row_leaf(WT_SESSION_IMPL *, @@ -323,7 +331,6 @@ static int __rec_row_leaf_insert( static int __rec_row_merge(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_split_col(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_split_discard(WT_SESSION_IMPL *, WT_PAGE *); -static int __rec_split_fixup(WT_SESSION_IMPL *, WT_RECONCILE *); static int __rec_split_row(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *); static int __rec_split_row_promote( WT_SESSION_IMPL *, WT_RECONCILE *, WT_ITEM *, uint8_t); @@ -968,6 +975,7 @@ __rec_destroy(WT_SESSION_IMPL *session, void *reconcilep) *(WT_RECONCILE **)reconcilep = NULL; __wt_buf_free(session, &r->disk_image); + __wt_scr_free(session, &r->interim_buf); __wt_free(session, r->raw_entries); __wt_free(session, r->raw_offsets); @@ -1032,7 +1040,8 @@ __rec_bnd_cleanup(WT_SESSION_IMPL *session, WT_RECONCILE *r, bool destroy) __wt_free(session, bnd->addr.addr); __wt_free(session, bnd->disk_image); __wt_free(session, bnd->supd); - __wt_buf_free(session, &bnd->key); + __wt_buf_free(session, &bnd->max_bnd_key); + __wt_buf_free(session, &bnd->min_bnd_key); } __wt_free(session, r->bnd); r->bnd_next = 0; @@ -1717,6 +1726,17 @@ __rec_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size) r->entries += v; r->space_avail -= size; r->first_free += size; + + /* + * If offset for the minimum split size boundary is not set, we have not + * yet reached the minimum boundary, reduce the space available for it. + */ + if (r->bnd[r->bnd_next].min_bnd_offset == 0) { + if (r->min_space_avail >= size) + r->min_space_avail -= size; + else + r->min_space_avail = 0; + } } /* @@ -1781,16 +1801,22 @@ __rec_dict_replace( return (0); /* - * If the dictionary cell reference is not set, we're creating a new - * entry in the dictionary, update its location. + * If the dictionary offset isn't set, we're creating a new entry in the + * dictionary, set its location. * - * If the dictionary cell reference is set, we have a matching value. - * Create a copy cell instead. + * If the dictionary offset is set, we have a matching value. Create a + * copy cell instead. */ - if (dp->cell == NULL) - dp->cell = r->first_free; + if (dp->offset == 0) + dp->offset = WT_PTRDIFF32(r->first_free, r->disk_image.mem); else { - offset = WT_PTRDIFF(r->first_free, dp->cell); + /* + * The offset is the byte offset from this cell to the previous, + * matching cell, NOT the byte offset from the beginning of the + * page. + */ + offset = (uint64_t)WT_PTRDIFF(r->first_free, + (uint8_t *)r->disk_image.mem + dp->offset); val->len = val->cell_len = __wt_cell_pack_copy(&val->cell, rle, offset); val->buf.data = NULL; @@ -1927,8 +1953,8 @@ static void __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) { bnd->offset = 0; - bnd->recno = WT_RECNO_OOB; - bnd->entries = 0; + bnd->max_bnd_recno = WT_RECNO_OOB; + bnd->max_bnd_entries = 0; __wt_free(session, bnd->addr.addr); WT_CLEAR(bnd->addr); @@ -1943,6 +1969,10 @@ __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) bnd->already_compressed = false; + bnd->min_bnd_offset = 0; + bnd->min_bnd_entries = 0; + bnd->min_bnd_recno = WT_RECNO_OOB; + /* * Don't touch the key, we re-use that memory in each new * reconciliation. @@ -1974,39 +2004,63 @@ __rec_split_bnd_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r) } /* - * __wt_split_page_size -- - * Split page size calculation: we don't want to repeatedly split every - * time a new entry is added, so we split to a smaller-than-maximum page size. + * __rec_split_page_size_from_pct -- + * Given a split percentage, calculate split page size in bytes. */ -uint32_t -__wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) -{ +static uint32_t +__rec_split_page_size_from_pct( + int split_pct, uint32_t maxpagesize, uint32_t allocsize) { uintmax_t a; uint32_t split_size; /* * Ideally, the split page size is some percentage of the maximum page - * size rounded to an allocation unit (round to an allocation unit so - * we don't waste space when we write). + * size rounded to an allocation unit (round to an allocation unit so we + * don't waste space when we write). */ a = maxpagesize; /* Don't overflow. */ split_size = (uint32_t)WT_ALIGN_NEAREST( - (a * (u_int)btree->split_pct) / 100, btree->allocsize); + (a * (u_int)split_pct) / 100, allocsize); /* - * Respect the configured split percentage if the calculated split - * size is either zero or a full page. The user has either configured - * an allocation size that matches the page size, or a split - * percentage that is close to zero or one hundred. Rounding is going - * to provide a worse outcome than having a split point that doesn't - * fall on an allocation size boundary in those cases. + * Respect the configured split percentage if the calculated split size + * is either zero or a full page. The user has either configured an + * allocation size that matches the page size, or a split percentage + * that is close to zero or one hundred. Rounding is going to provide a + * worse outcome than having a split point that doesn't fall on an + * allocation size boundary in those cases. */ if (split_size == 0 || split_size == maxpagesize) - split_size = (uint32_t)((a * (u_int)btree->split_pct) / 100); + split_size = (uint32_t)((a * (u_int)split_pct) / 100); return (split_size); } +/* + * __wt_split_page_size -- + * Split page size calculation: we don't want to repeatedly split every + * time a new entry is added, so we split to a smaller-than-maximum page size. + */ +uint32_t +__wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) +{ + return (__rec_split_page_size_from_pct( + btree->split_pct, maxpagesize, btree->allocsize)); +} + +/* + * __rec_min_split_page_size -- + * Minimum split size boundary calculation: To track a boundary at the + * minimum split size that we could have split at instead of splitting at + * the split page size. + */ +static uint32_t +__rec_min_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) +{ + return (__rec_split_page_size_from_pct( + WT_BTREE_MIN_SPLIT_PCT, maxpagesize, btree->allocsize)); +} + /* * __rec_split_init -- * Initialization for the reconciliation split functions. @@ -2018,7 +2072,7 @@ __rec_split_init(WT_SESSION_IMPL *session, WT_BM *bm; WT_BTREE *btree; WT_PAGE_HEADER *dsk; - size_t corrected_page_size; + size_t corrected_page_size, disk_img_buf_size; btree = S2BT(session); bm = btree->bm; @@ -2053,33 +2107,6 @@ __rec_split_init(WT_SESSION_IMPL *session, r->max_raw_page_size = r->page_size = (uint32_t)WT_MIN(r->page_size * 10, WT_MAX(r->page_size, btree->maxmempage / 2)); - - /* - * Ensure the disk image buffer is large enough for the max object, as - * corrected by the underlying block manager. - */ - corrected_page_size = r->page_size; - WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_init(session, &r->disk_image, corrected_page_size)); - - /* - * Clear the disk page header to ensure all of it is initialized, even - * the unused fields. - * - * In the case of fixed-length column-store, clear the entire buffer: - * fixed-length column-store sets bits in bytes, where the bytes are - * assumed to initially be 0. - */ - memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ? - corrected_page_size : WT_PAGE_HEADER_SIZE); - - /* - * Set the page type (the type doesn't change, and setting it later - * would require additional code in a few different places). - */ - dsk = r->disk_image.mem; - dsk->type = page->type; - /* * If we have to split, we want to choose a smaller page size for the * split pages, because otherwise we could end up splitting one large @@ -2099,22 +2126,28 @@ __rec_split_init(WT_SESSION_IMPL *session, * creating overflow items and compacted data, for example, as those * items have already been written to disk). So, the loop calls the * helper functions when approaching a split boundary, and we save the - * information at that point. That allows us to go back and split the - * page at the boundary points if we eventually overflow the maximum - * page size. + * information at that point. We also save the boundary information at + * the minimum split size. We maintain two chunks (each boundary + * represents a chunk that gets written as a page) in the memory, + * writing out the older one to the disk as a page when we need to make + * space for a new chunk. On reaching the last chunk, if it turns out to + * be smaller than the minimum split size, we go back into the + * penultimate chunk and split at this minimum split size boundary. This + * moves some data from the penultimate chunk to the last chunk, hence + * increasing the size of the last page written without decreasing the + * penultimate page size beyond the minimum split size. * * Finally, all this doesn't matter for fixed-size column-store pages, * raw compression, and salvage. Fixed-size column store pages can * split under (very) rare circumstances, but they're allocated at a * fixed page size, never anything smaller. In raw compression, the - * underlying compression routine decides when we split, so it's not - * our problem. In salvage, as noted above, we can't split at all. + * underlying compression routine decides when we split, so it's not our + * problem. In salvage, as noted above, we can't split at all. */ if (r->raw_compression || r->salvage != NULL) { r->split_size = 0; r->space_avail = r->page_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - } - else if (page->type == WT_PAGE_COL_FIX) { + } else if (page->type == WT_PAGE_COL_FIX) { r->split_size = r->page_size; r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); @@ -2122,32 +2155,55 @@ __rec_split_init(WT_SESSION_IMPL *session, r->split_size = __wt_split_page_size(btree, r->page_size); r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); + r->min_split_size = + __rec_min_split_page_size(btree, r->page_size); + r->min_space_avail = + r->min_split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); } + + /* + * Ensure the disk image buffer is large enough for the max object, as + * corrected by the underlying block manager. + * + * The buffer that we build disk image in, needs to hold two chunks + * worth of data. Since we want to support split_size more than the page + * size (to allow for adjustments based on the compression), this buffer + * should be greater of twice of split_size and page_size. + */ + corrected_page_size = r->page_size; + disk_img_buf_size = 2 * WT_MAX(corrected_page_size, r->split_size); + WT_RET(bm->write_size(bm, session, &corrected_page_size)); + WT_RET(__wt_buf_init(session, &r->disk_image, disk_img_buf_size)); + + /* + * Clear the disk page header to ensure all of it is initialized, even + * the unused fields. + * + * In the case of fixed-length column-store, clear the entire buffer: + * fixed-length column-store sets bits in bytes, where the bytes are + * assumed to initially be 0. + */ + memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ? + disk_img_buf_size : WT_PAGE_HEADER_SIZE); + + /* + * Set the page type (the type doesn't change, and setting it later + * would require additional code in a few different places). + */ + dsk = r->disk_image.mem; + dsk->type = page->type; + r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); /* Initialize the first boundary. */ r->bnd_next = 0; WT_RET(__rec_split_bnd_grow(session, r)); __rec_split_bnd_init(session, &r->bnd[0]); - r->bnd[0].recno = recno; + r->bnd[0].max_bnd_recno = recno; r->bnd[0].offset = WT_PAGE_HEADER_BYTE_SIZE(btree); - /* - * If the maximum page size is the same as the split page size, either - * because of the object type or application configuration, there isn't - * any need to maintain split boundaries within a larger page. - * - * No configuration for salvage here, because salvage can't split. - */ - if (r->raw_compression) - r->bnd_state = SPLIT_TRACKING_RAW; - else if (max == r->split_size) - r->bnd_state = SPLIT_TRACKING_OFF; - else - r->bnd_state = SPLIT_BOUNDARY; - - /* Initialize the entry counters. */ - r->entries = r->total_entries = 0; + /* Initialize the entry counter. */ + r->entries = 0; /* Initialize the starting record number. */ r->recno = recno; @@ -2350,19 +2406,112 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len) { WT_BM *bm; WT_BTREE *btree; - size_t corrected_page_size, len; + size_t corrected_page_size, inuse, len; btree = S2BT(session); bm = btree->bm; len = WT_PTRDIFF(r->first_free, r->disk_image.mem); - corrected_page_size = len + add_len; + inuse = (len - r->bnd[r->bnd_next].offset) + + WT_PAGE_HEADER_BYTE_SIZE(btree); + corrected_page_size = inuse + add_len; + WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_grow(session, &r->disk_image, corrected_page_size)); + /* Need to account for buffer carrying two chunks worth of data */ + WT_RET(__wt_buf_grow(session, &r->disk_image, 2 * corrected_page_size)); + r->first_free = (uint8_t *)r->disk_image.mem + len; - WT_ASSERT(session, corrected_page_size >= len); - r->space_avail = corrected_page_size - len; + WT_ASSERT(session, corrected_page_size >= inuse); + r->space_avail = corrected_page_size - inuse; WT_ASSERT(session, r->space_avail >= add_len); + + return (0); +} + +/* + * __rec_split_write_prev_and_shift_cur -- + * Write the previous split chunk to the disk as a page. Shift the contents + * of the current chunk to the start of the buffer, making space for a new + * chunk to be written. + * If the caller asks for a chunk resizing, the boundary between the two + * chunks is readjusted to the minimum split size boundary details stored + * in the previous chunk, letting the current chunk grow at the cost of the + * previous chunk. + */ +static int +__rec_split_write_prev_and_shift_cur( + WT_SESSION_IMPL *session, WT_RECONCILE *r, bool resize_chunks) +{ + WT_BM *bm; + WT_BOUNDARY *bnd_cur, *bnd_prev; + WT_BTREE *btree; + WT_PAGE_HEADER *dsk, *dsk_tmp; + size_t cur_len, len; + uint8_t *dsk_start; + + WT_ASSERT(session, r->bnd_next != 0); + + btree = S2BT(session); + bm = btree->bm; + bnd_cur = &r->bnd[r->bnd_next]; + bnd_prev = bnd_cur - 1; + dsk = r->disk_image.mem; + cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; + + /* + * Resize chunks if the current is smaller than the minimum, and there + * are details on the minimum split size boundary available in the + * previous boundary details. + * + * There is a possibility that we do not have a minimum boundary set, in + * such a case we skip chunk resizing. Such a condition is possible for + * instance when we are building the image in the buffer and the first + * K/V pair is large enough that it surpasses both the minimum split + * size and the split size the application has set. In such a case we + * split the chunk without saving any minimum boundary. + */ + if (resize_chunks && + cur_len < r->min_split_size && bnd_prev->min_bnd_offset != 0) { + bnd_cur->offset = bnd_prev->min_bnd_offset; + bnd_cur->max_bnd_entries += + bnd_prev->max_bnd_entries - bnd_prev->min_bnd_entries; + bnd_prev->max_bnd_entries = bnd_prev->min_bnd_entries; + bnd_cur->max_bnd_recno = bnd_prev->min_bnd_recno; + + WT_RET(__wt_buf_set(session, &bnd_cur->max_bnd_key, + bnd_prev->min_bnd_key.data, bnd_prev->min_bnd_key.size)); + + /* Update current chunk's length */ + cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; + } + + /* + * Create an interim buffer if not already done to prepare the previous + * chunk's disk image. + */ + len = bnd_cur->offset; + WT_RET(bm->write_size(bm, session, &len)); + if (r->interim_buf == NULL) + WT_RET(__wt_scr_alloc(session, len, &r->interim_buf)); + else + WT_RET(__wt_buf_init(session, r->interim_buf, len)); + + dsk_tmp = r->interim_buf->mem; + memcpy(dsk_tmp, dsk, bnd_cur->offset); + dsk_tmp->recno = bnd_prev->max_bnd_recno; + dsk_tmp->u.entries = bnd_prev->max_bnd_entries; + dsk_tmp->mem_size = WT_STORE_SIZE(bnd_cur->offset); + r->interim_buf->size = dsk_tmp->mem_size; + WT_RET(__rec_split_write(session, r, bnd_prev, r->interim_buf, false)); + + /* Shift the current chunk to the start of the buffer */ + dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); + (void)memmove(dsk_start, (uint8_t *)dsk + bnd_cur->offset, cur_len); + + /* Fix boundary offset */ + bnd_cur->offset = WT_PAGE_HEADER_BYTE_SIZE(btree); + /* Fix where free points */ + r->first_free = dsk_start + cur_len; return (0); } @@ -2382,6 +2531,9 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) btree = S2BT(session); dsk = r->disk_image.mem; + /* Fixed length col store can call with next_len 0 */ + WT_ASSERT(session, next_len == 0 || r->space_avail < next_len); + /* * We should never split during salvage, and we're about to drop core * because there's no parent page. @@ -2391,147 +2543,63 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) "%s page too large, attempted split during salvage", __wt_page_type_string(r->page->type)); - /* Hitting a page boundary resets the dictionary, in all cases. */ - __rec_dictionary_reset(r); - - inuse = WT_PTRDIFF(r->first_free, dsk); - switch (r->bnd_state) { - case SPLIT_BOUNDARY: - /* - * We can get here if the first key/value pair won't fit. - * Additionally, grow the buffer to contain the current item if - * we haven't already consumed a reasonable portion of a split - * chunk. - */ - if (inuse < r->split_size / 2) - break; - - /* - * About to cross a split boundary but not yet forced to split - * into multiple pages. If we have to split, this is one of the - * split points, save information about where we are when the - * split would have happened. - */ - WT_RET(__rec_split_bnd_grow(session, r)); - last = &r->bnd[r->bnd_next++]; - next = last + 1; - - /* Set the number of entries for the just finished chunk. */ - last->entries = r->entries - r->total_entries; - r->total_entries = r->entries; - - /* Set the key for the next chunk. */ - next->recno = r->recno; - if (dsk->type == WT_PAGE_ROW_INT || - dsk->type == WT_PAGE_ROW_LEAF) - WT_RET(__rec_split_row_promote( - session, r, &next->key, dsk->type)); - - /* - * Set the starting buffer offset and clear the entries (the - * latter not required, but cleaner). - */ - next->offset = WT_PTRDIFF(r->first_free, dsk); - next->entries = 0; - - /* Set the space available to another split-size chunk. */ - r->space_avail = - r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - - /* - * Adjust the space available to handle two cases: - * - We don't have enough room for another full split-size - * chunk on the page. - * - We chose to fill past a page boundary because of a - * large item. - */ - if (inuse + r->space_avail > r->page_size) { - r->space_avail = - r->page_size > inuse ? (r->page_size - inuse) : 0; - - /* There are no further boundary points. */ - r->bnd_state = SPLIT_MAX; - } - - /* - * Return if the next object fits into this page, else we have - * to split the page. - */ - if (r->space_avail >= next_len) - return (0); - - /* FALLTHROUGH */ - case SPLIT_MAX: - /* - * We're going to have to split and create multiple pages. - * - * Cycle through the saved split-point information, writing the - * split chunks we have tracked. The underlying fixup function - * sets the space available and other information, and copied - * any unwritten chunk of data to the beginning of the buffer. - */ - WT_RET(__rec_split_fixup(session, r)); - - /* We're done saving split chunks. */ - r->bnd_state = SPLIT_TRACKING_OFF; - break; - case SPLIT_TRACKING_OFF: - /* - * We can get here if the first key/value pair won't fit. - * Additionally, grow the buffer to contain the current item if - * we haven't already consumed a reasonable portion of a split - * chunk. - */ - if (inuse < r->split_size / 2) - break; + last = &r->bnd[r->bnd_next]; + inuse = (WT_PTRDIFF(r->first_free, dsk) - last->offset) + + WT_PAGE_HEADER_BYTE_SIZE(btree); - /* - * The key/value pairs didn't fit into a single page, but either - * we've already noticed that and are now processing the rest of - * the pairs at split size boundaries, or the split size was the - * same as the page size, and we never bothered with split point - * information at all. - */ - WT_RET(__rec_split_bnd_grow(session, r)); - last = &r->bnd[r->bnd_next++]; - next = last + 1; + /* + * We can get here if the first key/value pair won't fit. + * Additionally, grow the buffer to contain the current item if we + * haven't already consumed a reasonable portion of a split chunk. + */ + if (inuse < r->split_size / 2) + goto done; - /* - * Set the key for the next chunk (before writing the block, a - * key range is needed in that code). - */ - next->recno = r->recno; - if (dsk->type == WT_PAGE_ROW_INT || - dsk->type == WT_PAGE_ROW_LEAF) - WT_RET(__rec_split_row_promote( - session, r, &next->key, dsk->type)); + /* All page boundaries reset the dictionary. */ + __rec_dictionary_reset(r); - /* Clear the entries (not required, but cleaner). */ - next->entries = 0; + /* Set the number of entries for the just finished chunk. */ + last->max_bnd_entries = r->entries; - /* Finalize the header information and write the page. */ - dsk->recno = last->recno; - dsk->u.entries = r->entries; - dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); + /* + * In case of bulk load, write out chunks as we get them. Otherwise we + * keep two chunks in memory at a given time. So, if there is a previous + * chunk, write it out, making space in the buffer for the next chunk to + * be written. + */ + if (r->is_bulk_load) { + dsk->recno = last->max_bnd_recno; + dsk->u.entries = last->max_bnd_entries; + dsk->mem_size = (uint32_t)inuse; r->disk_image.size = dsk->mem_size; - WT_RET( - __rec_split_write(session, r, last, &r->disk_image, false)); - - /* - * Set the caller's entry count and buffer information for the - * next chunk. We only get here if we're not splitting or have - * already split, so it's split-size chunks from here on out. - */ - r->entries = 0; + WT_RET(__rec_split_write( + session, r, last, &r->disk_image, false)); + /* Fix where free points */ r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); - r->space_avail = - r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - break; - case SPLIT_TRACKING_RAW: - return (__wt_illegal_value(session, NULL)); - } + } else if (r->bnd_next != 0) + WT_RET(__rec_split_write_prev_and_shift_cur(session, r, false)); - /* + /* Prepare the next boundary */ + WT_RET(__rec_split_bnd_grow(session, r)); + r->bnd_next++; + next = &r->bnd[r->bnd_next]; + next->offset = WT_PTRDIFF(r->first_free, dsk); + /* Set the key for the next chunk. */ + next->max_bnd_recno = r->recno; + if (dsk->type == WT_PAGE_ROW_INT || dsk->type == WT_PAGE_ROW_LEAF) + WT_RET(__rec_split_row_promote( + session, r, &next->max_bnd_key, dsk->type)); + + r->entries = 0; + /* + * Set the space available to another split-size and minimum split-size + * chunk. + */ + r->space_avail = r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); + r->min_space_avail = + r->min_split_size - WT_PAGE_HEADER_BYTE_SIZE(btree); + +done: /* * Overflow values can be larger than the maximum page size but still be * "on-page". If the next key/value pair is larger than space available * after a split has happened (in other words, larger than the maximum @@ -2548,6 +2616,64 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) return (0); } +/* + * __rec_split_crossing_bnd -- + * Save the details for the minimum split size boundary or call for a + * split. + */ +static inline int +__rec_split_crossing_bnd( + WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) +{ + WT_BOUNDARY *bnd; + WT_BTREE *btree; + WT_PAGE_HEADER *dsk; + size_t min_bnd_offset; + + WT_ASSERT(session, WT_CHECK_CROSSING_BND(r, next_len)); + + /* + * If crossing the minimum split size boundary, store the boundary + * details at the current location in the buffer. If we are crossing the + * split boundary at the same time, possible when the next record is + * large enough, just split at this point. + */ + if (WT_CROSSING_MIN_BND(r, next_len) && + !WT_CROSSING_SPLIT_BND(r, next_len)) { + btree = S2BT(session); + bnd = &r->bnd[r->bnd_next]; + dsk = r->disk_image.mem; + min_bnd_offset = (WT_PTRDIFF(r->first_free, dsk) - + bnd->offset) + WT_PAGE_HEADER_BYTE_SIZE(btree); + if (min_bnd_offset == WT_PAGE_HEADER_BYTE_SIZE(btree)) + /* + * This is possible if the first record doesn't fit in + * the minimum split size, we write this record without + * setting up any boundary here. We will get the + * opportunity to setup a boundary before writing out + * the next record. + */ + return (0); + + WT_ASSERT(session, bnd->min_bnd_offset == 0); + + /* All page boundaries reset the dictionary. */ + __rec_dictionary_reset(r); + + bnd->min_bnd_offset = min_bnd_offset; + bnd->min_bnd_entries = r->entries; + bnd->min_bnd_recno = r->recno; + if (dsk->type == WT_PAGE_ROW_INT || + dsk->type == WT_PAGE_ROW_LEAF) + WT_RET(__rec_split_row_promote( + session, r, &bnd->min_bnd_key, dsk->type)); + return (0); + } + + /* We are crossing a split boundary */ + return (__rec_split(session, r, next_len)); +} + /* * __rec_split_raw_worker -- * Handle the raw compression page reconciliation bookkeeping. @@ -2626,7 +2752,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, */ recno = WT_RECNO_OOB; if (dsk->type == WT_PAGE_COL_VAR) - recno = last->recno; + recno = last->max_bnd_recno; entry = max_image_slot = slots = 0; WT_CELL_FOREACH(btree, dsk, cell, unpack, i) { @@ -2853,7 +2979,7 @@ no_slots: */ dst->size = result_len + WT_BLOCK_COMPRESS_SKIP; dsk_dst = dst->mem; - dsk_dst->recno = last->recno; + dsk_dst->recno = last->max_bnd_recno; dsk_dst->mem_size = r->raw_offsets[result_slots] + WT_BLOCK_COMPRESS_SKIP; dsk_dst->u.entries = r->raw_entries[result_slots - 1]; @@ -2873,7 +2999,7 @@ no_slots: WT_RET(__wt_strndup(session, dsk, dsk_dst->mem_size, &last->disk_image)); disk_image = last->disk_image; - disk_image->recno = last->recno; + disk_image->recno = last->max_bnd_recno; disk_image->mem_size = dsk_dst->mem_size; disk_image->u.entries = dsk_dst->u.entries; } @@ -2903,14 +3029,14 @@ no_slots: */ switch (dsk->type) { case WT_PAGE_COL_INT: - next->recno = r->raw_recnos[result_slots]; + next->max_bnd_recno = r->raw_recnos[result_slots]; break; case WT_PAGE_COL_VAR: - next->recno = r->raw_recnos[result_slots - 1]; + next->max_bnd_recno = r->raw_recnos[result_slots - 1]; break; case WT_PAGE_ROW_INT: case WT_PAGE_ROW_LEAF: - next->recno = WT_RECNO_OOB; + next->max_bnd_recno = WT_RECNO_OOB; if (!last_block) { /* * Confirm there was uncompressed data remaining @@ -2919,7 +3045,7 @@ no_slots: */ WT_ASSERT(session, len > 0); WT_RET(__rec_split_row_promote_cell( - session, dsk, &next->key)); + session, dsk, &next->max_bnd_key)); } break; } @@ -2931,7 +3057,7 @@ no_slots: */ WT_STAT_DATA_INCR(session, compress_raw_fail); - dsk->recno = last->recno; + dsk->recno = last->max_bnd_recno; dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); dsk->u.entries = r->entries; r->disk_image.size = dsk->mem_size; @@ -3008,35 +3134,9 @@ __rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) static int __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) { - WT_BOUNDARY *bnd; + WT_BOUNDARY *bnd_cur, *bnd_prev; WT_PAGE_HEADER *dsk; - - /* Adjust the boundary information based on our split status. */ - switch (r->bnd_state) { - case SPLIT_BOUNDARY: - case SPLIT_MAX: - /* - * We never split, the reconciled page fit into a maximum page - * size. Change the first boundary slot to represent the full - * page (the first boundary slot is largely correct, just update - * the number of entries). - */ - r->bnd_next = 0; - break; - case SPLIT_TRACKING_OFF: - /* - * If we have already split, or aren't tracking boundaries, put - * the remaining data in the next boundary slot. - */ - WT_RET(__rec_split_bnd_grow(session, r)); - break; - case SPLIT_TRACKING_RAW: - /* - * We were configured for raw compression, and either we never - * wrote anything, or there's a remaindered block of data. - */ - break; - } + bool grow_bnd; /* * We may arrive here with no entries to write if the page was entirely @@ -3063,20 +3163,66 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) return (EBUSY); } - /* Set the boundary reference and increment the count. */ - bnd = &r->bnd[r->bnd_next++]; - bnd->entries = r->entries; - - /* Finalize the header information. */ dsk = r->disk_image.mem; - dsk->recno = bnd->recno; - dsk->u.entries = r->entries; + + /* Set the number of entries for the just finished chunk. */ + bnd_cur = &r->bnd[r->bnd_next]; + bnd_cur->max_bnd_entries = r->entries; + + grow_bnd = true; + /* + * We can reach here even with raw_compression when the last split chunk + * is too small to be sent for raw compression. + */ + if (!r->is_bulk_load && !r->raw_compression) { + if (WT_PTRDIFF(r->first_free, dsk) > r->page_size && + r->bnd_next != 0) { + /* + * We hold two boundaries worth of data in the buffer, + * and this data doesn't fit in a single page. If the + * last chunk is too small, readjust the boundary to a + * pre-computed minimum. + * Write out the penultimate chunk to the disk as a page + */ + WT_RET(__rec_split_write_prev_and_shift_cur( + session, r, true)); + } else + if (r->bnd_next != 0) { + /* + * We have two boundaries, but the data in the + * buffer can fit a single page. Merge the + * boundaries to create a single chunk. + */ + bnd_prev = bnd_cur - 1; + bnd_prev->max_bnd_entries += + bnd_cur->max_bnd_entries; + r->bnd_next--; + grow_bnd = false; + } + } + + /* + * We already have space for an extra boundary if we merged two + * boundaries above, in that case we do not need to grow the boundary + * structure. + */ + if (grow_bnd) + WT_RET(__rec_split_bnd_grow(session, r)); + bnd_cur = &r->bnd[r->bnd_next]; + r->bnd_next++; + + /* + * Current boundary now has all the remaining data/last page now. + * Let's write it to the disk + */ + dsk->recno = bnd_cur->max_bnd_recno; + dsk->u.entries = bnd_cur->max_bnd_entries; dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); r->disk_image.size = dsk->mem_size; /* If this is a checkpoint, we're done, otherwise write the page. */ - return (__rec_is_checkpoint(session, r, bnd) ? - 0 : __rec_split_write(session, r, bnd, &r->disk_image, true)); + return (__rec_is_checkpoint(session, r, bnd_cur) ? + 0 : __rec_split_write(session, r, bnd_cur, &r->disk_image, true)); } /* @@ -3109,98 +3255,6 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) return (__rec_split_finish_std(session, r)); } -/* - * __rec_split_fixup -- - * Fix up after crossing the maximum page boundary. - */ -static int -__rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r) -{ - WT_BOUNDARY *bnd; - WT_BTREE *btree; - WT_DECL_ITEM(tmp); - WT_DECL_RET; - WT_PAGE_HEADER *dsk; - size_t i, len; - uint8_t *dsk_start, *p; - - /* - * When we overflow physical limits of the page, we walk the list of - * split chunks we've created and write those pages out, then update - * the caller's information. - */ - btree = S2BT(session); - - /* - * The data isn't laid out on a page boundary or nul padded; copy it to - * a clean, aligned, padded buffer before writing it. - * - * Allocate a scratch buffer to hold the new disk image. Copy the disk - * page's header and block-manager space into the scratch buffer, most - * of the header information remains unchanged between the pages. - */ - WT_RET(__wt_scr_alloc(session, r->disk_image.memsize, &tmp)); - dsk = tmp->mem; - memcpy(dsk, r->disk_image.mem, WT_PAGE_HEADER_BYTE_SIZE(btree)); - - /* - * For each split chunk we've created, update the disk image and copy - * it into place. - */ - dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); - for (i = 0, bnd = r->bnd; i < r->bnd_next; ++i, ++bnd) { - /* Copy the page contents to the temporary buffer. */ - len = (bnd + 1)->offset - bnd->offset; - memcpy(dsk_start, - (uint8_t *)r->disk_image.mem + bnd->offset, len); - - /* Finalize the header information and write the page. */ - dsk->recno = bnd->recno; - dsk->u.entries = bnd->entries; - tmp->size = WT_PAGE_HEADER_BYTE_SIZE(btree) + len; - dsk->mem_size = WT_STORE_SIZE(tmp->size); - WT_ERR(__rec_split_write(session, r, bnd, tmp, false)); - } - - /* - * There is probably a remnant in the working buffer that didn't get - * written, copy it down to the beginning of the working buffer. - * - * Confirm the remnant is no larger than a split-sized chunk, including - * header. We know that's the maximum sized remnant because we only have - * remnants if split switches from accumulating to a split boundary to - * accumulating to the end of the page (the other path here is when we - * hit a split boundary, there was room for another split chunk in the - * page, and the next item still wouldn't fit, in which case there is no - * remnant). So: we were accumulating to the end of the page and created - * a remnant. We know the remnant cannot be as large as a split-sized - * chunk, including header, because if there was room for that large a - * remnant, we wouldn't have switched from accumulating to a page end. - */ - p = (uint8_t *)r->disk_image.mem + bnd->offset; - len = WT_PTRDIFF(r->first_free, p); - if (len >= r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree)) - WT_PANIC_ERR(session, EINVAL, - "Reconciliation remnant too large for the split buffer"); - dsk = r->disk_image.mem; - dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); - (void)memmove(dsk_start, p, len); - - /* - * Fix up our caller's information, including updating the starting - * record number. - */ - r->entries -= r->total_entries; - r->first_free = dsk_start + len; - WT_ASSERT(session, - r->page_size >= (WT_PAGE_HEADER_BYTE_SIZE(btree) + len)); - r->space_avail = - r->split_size - (WT_PAGE_HEADER_BYTE_SIZE(btree) + len); - -err: __wt_scr_free(session, &tmp); - return (ret); -} - /* * __rec_split_write -- * Write a disk block out for the split helper functions. @@ -3222,11 +3276,17 @@ __rec_split_write(WT_SESSION_IMPL *session, int cmp; uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE]; bool need_image; +#ifdef HAVE_DIAGNOSTIC + bool verify_image; +#endif btree = S2BT(session); dsk = buf->mem; page = r->page; mod = page->modify; +#ifdef HAVE_DIAGNOSTIC + verify_image = true; +#endif /* Set the zero-length value flag in the page header. */ if (dsk->type == WT_PAGE_ROW_LEAF) { @@ -3238,8 +3298,6 @@ __rec_split_write(WT_SESSION_IMPL *session, F_SET(dsk, WT_PAGE_EMPTY_V_NONE); } - bnd->entries = r->entries; - /* Initialize the address (set the page type for the parent). */ switch (dsk->type) { case WT_PAGE_COL_FIX: @@ -3285,7 +3343,8 @@ __rec_split_write(WT_SESSION_IMPL *session, switch (page->type) { case WT_PAGE_COL_FIX: case WT_PAGE_COL_VAR: - if (WT_INSERT_RECNO(supd->ins) >= (bnd + 1)->recno) + if (WT_INSERT_RECNO(supd->ins) >= + (bnd + 1)->max_bnd_recno) goto supd_check_complete; break; case WT_PAGE_ROW_LEAF: @@ -3296,8 +3355,8 @@ __rec_split_write(WT_SESSION_IMPL *session, key->data = WT_INSERT_KEY(supd->ins); key->size = WT_INSERT_KEY_SIZE(supd->ins); } - WT_ERR(__wt_compare(session, - btree->collator, key, &(bnd + 1)->key, &cmp)); + WT_ERR(__wt_compare(session, btree->collator, + key, &(bnd + 1)->max_bnd_key, &cmp)); if (cmp >= 0) goto supd_check_complete; break; @@ -3387,18 +3446,21 @@ supd_check_complete: #ifdef HAVE_VERBOSE /* Output a verbose message if we create a page without many entries */ - if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && r->entries < 6) + if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && + bnd->max_bnd_entries < 6) __wt_verbose(session, WT_VERB_SPLIT, "Reconciliation creating a page with %" PRIu32 " entries, memory footprint %" WT_SIZET_FMT - ", page count %" PRIu32 ", %s, split state: %d", - r->entries, r->page->memory_footprint, r->bnd_next, - F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint", - r->bnd_state); + ", page count %" PRIu32 ", %s", bnd->max_bnd_entries, + r->page->memory_footprint, r->bnd_next, + F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint"); #endif WT_ERR(__wt_bt_write(session, buf, addr, &addr_size, false, F_ISSET(r, WT_CHECKPOINTING), bnd->already_compressed)); +#ifdef HAVE_DIAGNOSTIC + verify_image = false; +#endif WT_ERR(__wt_strndup(session, addr, addr_size, &bnd->addr.addr)); bnd->addr.size = (uint8_t)addr_size; @@ -3425,9 +3487,20 @@ copy_image: */ need_image = F_ISSET(r, WT_EVICT_SCRUB) || (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL); - if (need_image && bnd->disk_image == NULL) + if (need_image && bnd->disk_image == NULL) { +#ifdef HAVE_DIAGNOSTIC + /* + * The I/O routines verify all disk images we write, but there + * are paths in reconciliation that don't do I/O. Verify those + * images, too. + */ + WT_ASSERT(session, verify_image == false || + __wt_verify_dsk_image( + session, "[reconcile-image]", buf->data, 0, true) == 0); +#endif WT_ERR(__wt_strndup( session, buf->data, buf->size, &bnd->disk_image)); + } if (!need_image) __wt_free(session, bnd->disk_image); @@ -3680,11 +3753,12 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) cursor->value.data, cursor->value.size, (uint64_t)0)); /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) - WT_RET( - __rec_split_raw(session, r, key->len + val->len)); - else { + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) + WT_RET(__rec_split_raw( + session, r, key->len + val->len)); + } else + if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) { /* * Turn off prefix compression until a full key written * to the new page, and (unless already working with an @@ -3696,10 +3770,9 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_RET(__rec_cell_build_leaf_key( session, r, NULL, 0, &ovfl_key)); } - - WT_RET(__rec_split(session, r, key->len + val->len)); + WT_RET(__rec_split_crossing_bnd( + session, r, key->len + val->len)); } - } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -3740,6 +3813,10 @@ __rec_col_fix_bulk_insert_split_check(WT_CURSOR_BULK *cbulk) * split. * * Boundary: split or write the page. + * + * No need to have a minimum split size boundary, all + * pages are filled 100% except the last, allowing it to + * grow in the future. */ __rec_incr(session, r, cbulk->entry, __bitstr_size( @@ -3844,10 +3921,12 @@ __wt_bulk_insert_var( r, cbulk->last.data, cbulk->last.size, cbulk->rle)); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_RET(__rec_split_raw(session, r, val->len)); + } else + if (WT_CROSSING_SPLIT_BND(r, val->len)) + WT_RET(__rec_split_crossing_bnd(session, r, val->len)); /* Copy the value onto the page. */ if (btree->dictionary) @@ -3983,10 +4062,13 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) WT_CHILD_RELEASE_ERR(session, hazard, ref); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_ERR(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_ERR(__rec_split_raw(session, r, val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, val->len)) + WT_ERR(__rec_split_crossing_bnd( + session, r, val->len)); /* Copy the value onto the page. */ __rec_copy_incr(session, r, val); @@ -4028,10 +4110,13 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) addr->addr, addr->size, __rec_vtype(addr), r->recno); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_RET(__rec_split_raw(session, r, val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, val->len)) + WT_RET(__rec_split_crossing_bnd( + session, r, val->len)); /* Copy the value onto the page. */ __rec_copy_incr(session, r, val); @@ -4139,6 +4224,10 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) * split. * * Boundary: split or write the page. + * + * No need to have a minimum split size boundary, all + * pages are filled 100% except the last, allowing it to + * grow in the future. */ __rec_incr(session, r, entry, __bitstr_size((size_t)entry * btree->bitcnt)); @@ -4295,10 +4384,13 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, session, r, value->data, value->size, rle)); /* Boundary: split or write the page. */ - if (val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, val->len) : - __rec_split(session, r, val->len)); + if (r->raw_compression) { + if (val->len > r->space_avail) + WT_RET(__rec_split_raw(session, r, val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, val->len)) + WT_RET(__rec_split_crossing_bnd( + session, r, val->len)); /* Copy the value onto the page. */ if (!deleted && !overflow_type && btree->dictionary) @@ -4961,11 +5053,12 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) r->cell_zero = false; /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) WT_ERR(__rec_split_raw( session, r, key->len + val->len)); - else { + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) { /* * In one path above, we copied address blocks * from the page rather than building the actual @@ -4977,10 +5070,10 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_IKEY_DATA(ikey), ikey->size)); key_onpage_ovfl = false; } - WT_ERR(__rec_split( + + WT_ERR(__rec_split_crossing_bnd( session, r, key->len + val->len)); } - } /* Copy the key and value onto the page. */ __rec_copy_incr(session, r, key); @@ -5030,10 +5123,14 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) addr->addr, addr->size, __rec_vtype(addr), WT_RECNO_OOB); /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) - WT_RET(r->raw_compression ? - __rec_split_raw(session, r, key->len + val->len) : - __rec_split(session, r, key->len + val->len)); + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) + WT_RET(__rec_split_raw( + session, r, key->len + val->len)); + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) + WT_RET(__rec_split_crossing_bnd( + session, r, key->len + val->len)); /* Copy the key and value onto the page. */ __rec_copy_incr(session, r, key); @@ -5362,16 +5459,17 @@ build: } /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) WT_ERR(__rec_split_raw( session, r, key->len + val->len)); - else { + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) { /* - * In one path above, we copied address blocks - * from the page rather than building the actual - * key. In that case, we have to build the key - * now because we are about to promote it. + * If we copied address blocks from the page + * rather than building the actual key, we have + * to build the key now because we are about to + * promote it. */ if (key_onpage_ovfl) { WT_ERR(__wt_dsk_cell_data_ref(session, @@ -5390,14 +5488,13 @@ build: if (!ovfl_key) WT_ERR( __rec_cell_build_leaf_key( - session, - r, NULL, 0, &ovfl_key)); + session, r, NULL, 0, + &ovfl_key)); } - WT_ERR(__rec_split( + WT_ERR(__rec_split_crossing_bnd( session, r, key->len + val->len)); } - } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -5460,11 +5557,12 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins), &ovfl_key)); /* Boundary: split or write the page. */ - if (key->len + val->len > r->space_avail) { - if (r->raw_compression) + if (r->raw_compression) { + if (key->len + val->len > r->space_avail) WT_RET(__rec_split_raw( session, r, key->len + val->len)); - else { + } else + if (WT_CHECK_CROSSING_BND(r, key->len + val->len)) { /* * Turn off prefix compression until a full key * written to the new page, and (unless already @@ -5476,14 +5574,13 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) if (!ovfl_key) WT_RET( __rec_cell_build_leaf_key( - session, - r, NULL, 0, &ovfl_key)); + session, r, NULL, 0, + &ovfl_key)); } - WT_RET(__rec_split( + WT_RET(__rec_split_crossing_bnd( session, r, key->len + val->len)); } - } /* Copy the key/value pair onto the page. */ __rec_copy_incr(session, r, key); @@ -5595,13 +5692,14 @@ __rec_split_dump_keys(WT_SESSION_IMPL *session, WT_PAGE *page, WT_RECONCILE *r) __wt_verbose(session, WT_VERB_SPLIT, "starting key %s", __wt_buf_set_printable( - session, bnd->key.data, bnd->key.size, tkey)); + session, bnd->max_bnd_key.data, + bnd->max_bnd_key.size, tkey)); break; case WT_PAGE_COL_FIX: case WT_PAGE_COL_INT: case WT_PAGE_COL_VAR: __wt_verbose(session, WT_VERB_SPLIT, - "starting recno %" PRIu64, bnd->recno); + "starting recno %" PRIu64, bnd->max_bnd_recno); break; WT_ILLEGAL_VALUE_ERR(session); } @@ -5863,10 +5961,10 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) /* We never set the first page's key, grab it from the original page. */ ref = r->ref; if (__wt_ref_is_root(ref)) - WT_RET(__wt_buf_set(session, &r->bnd[0].key, "", 1)); + WT_RET(__wt_buf_set(session, &r->bnd[0].max_bnd_key, "", 1)); else { __wt_ref_key(ref->home, ref, &p, &size); - WT_RET(__wt_buf_set(session, &r->bnd[0].key, p, size)); + WT_RET(__wt_buf_set(session, &r->bnd[0].max_bnd_key, p, size)); } /* Allocate, then initialize the array of replacement blocks. */ @@ -5874,8 +5972,8 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) for (multi = mod->mod_multi, bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) { - WT_RET(__wt_row_ikey_alloc(session, 0, - bnd->key.data, bnd->key.size, &multi->key.ikey)); + WT_RET(__wt_row_ikey_alloc(session, 0, bnd->max_bnd_key.data, + bnd->max_bnd_key.size, &multi->key.ikey)); /* * Copy any disk image. Don't take saved updates without a @@ -5922,7 +6020,7 @@ __rec_split_col(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) for (multi = mod->mod_multi, bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) { - multi->key.recno = bnd->recno; + multi->key.recno = bnd->max_bnd_recno; /* * Copy any disk image. Don't take saved updates without a @@ -6399,7 +6497,8 @@ __rec_dictionary_lookup( for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash); dp != NULL && dp->hash == hash; dp = dp->next[0]) { WT_RET(__wt_cell_pack_data_match( - dp->cell, &val->cell, val->buf.data, &match)); + (WT_CELL *)((uint8_t *)r->disk_image.mem + dp->offset), + &val->cell, val->buf.data, &match)); if (match) { WT_STAT_DATA_INCR(session, rec_dictionary); *dpp = dp; @@ -6425,7 +6524,7 @@ __rec_dictionary_lookup( * know where on the page it will be written). */ next = r->dictionary[r->dictionary_next++]; - next->cell = NULL; /* Not necessary, just cautious. */ + next->offset = 0; /* Not necessary, just cautious. */ next->hash = hash; __rec_dictionary_skip_insert(r->dictionary_head, next, hash); *dpp = next; diff --git a/test/format/config.h b/test/format/config.h index e3e1e73a786..b5feb7a5321 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -284,7 +284,7 @@ static CONFIG c[] = { { "split_pct", "page split size as a percentage of the maximum page size", - 0x0, 40, 85, 85, &g.c_split_pct, NULL }, + 0x0, 50, 100, 100, &g.c_split_pct, NULL }, { "statistics", "maintain statistics", /* 20% */ -- cgit v1.2.1 From d2dd272da04d8ca33f23eac11de953e3c16f9a95 Mon Sep 17 00:00:00 2001 From: "Alexandra (Sasha) Fedorova" Date: Thu, 30 Mar 2017 14:38:30 -0700 Subject: WT-3190 perform a complete re-tune of eviction workers every 30 seconds. (#3324) Otherwise the number of workers wouldn't adjust when the workload changed. --- dist/stat_data.py | 1 + src/evict/evict_lru.c | 83 ++++++++--- src/include/stat.h | 1 + src/include/wiredtiger.in | 362 +++++++++++++++++++++++----------------------- src/support/stat.c | 4 + 5 files changed, 250 insertions(+), 201 deletions(-) diff --git a/dist/stat_data.py b/dist/stat_data.py index a4d92345f88..8fed3f3ac4a 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -206,6 +206,7 @@ connection_stats = [ CacheStat('cache_eviction_force', 'pages evicted because they exceeded the in-memory maximum'), CacheStat('cache_eviction_force_delete', 'pages evicted because they had chains of deleted items'), CacheStat('cache_eviction_force_fail', 'failed eviction of pages that exceeded the in-memory maximum'), + CacheStat('cache_eviction_force_retune', 'force re-tuning of eviction workers once in a while'), CacheStat('cache_eviction_get_ref', 'eviction calls to get a page'), CacheStat('cache_eviction_get_ref_empty', 'eviction calls to get a page found queue empty'), CacheStat('cache_eviction_get_ref_empty2', 'eviction calls to get a page found queue empty after locking'), diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index a957d245958..3ce35c60f2e 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -900,24 +900,32 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) } #define EVICT_TUNE_BATCH 1 /* Max workers to add each period */ -#define EVICT_TUNE_DATAPT_MIN 3 /* Data points needed before deciding - if we should keep adding workers or - settle on an earlier value. */ +/* + * Data points needed before deciding if we should keep adding workers or settle + * on an earlier value. + */ +#define EVICT_TUNE_DATAPT_MIN 3 #define EVICT_TUNE_PERIOD 1 /* Tune period in seconds */ +/* + * We will do a fresh re-tune every that many seconds to adjust to + * significant phase changes. + */ +#define EVICT_FORCE_RETUNE 30 + /* * __evict_tune_workers -- * Find the right number of eviction workers. Gradually ramp up the number of * workers increasing the number in batches indicated by the setting above. - * Store the number of workers that gave us the best throughput so far and - * the number of data points we have tried. + * Store the number of workers that gave us the best throughput so far and the + * number of data points we have tried. * - * Every once in a while when we have the minimum number of data points - * we check whether the eviction throughput achieved with the current number - * of workers is the best we have seen so far. If so, we will keep increasing - * the number of workers. If not, we are past the infliction point on the - * eviction throughput curve. In that case, we will set the number of workers - * to the best observed so far and settle into a stable state. + * Every once in a while when we have the minimum number of data points we check + * whether the eviction throughput achieved with the current number of workers + * is the best we have seen so far. If so, we will keep increasing the number of + * workers. If not, we are past the infliction point on the eviction throughput + * curve. In that case, we will set the number of workers to the best observed + * so far and settle into a stable state. */ static int __evict_tune_workers(WT_SESSION_IMPL *session) @@ -927,27 +935,60 @@ __evict_tune_workers(WT_SESSION_IMPL *session) WT_CONNECTION_IMPL *conn; WT_DECL_RET; uint64_t cur_threads, delta_msec, delta_pages, i, target_threads; - uint64_t pgs_evicted_cur, pgs_evicted_persec_cur; + uint64_t pgs_evicted_cur, pgs_evicted_persec_cur, time_diff; uint32_t thread_surplus; conn = S2C(session); cache = conn->cache; WT_ASSERT(session, conn->evict_threads.threads[0]->session == session); - - if (conn->evict_tune_stable) - return (0); + pgs_evicted_cur = pgs_evicted_persec_cur = 0; __wt_epoch(session, ¤t_time); + time_diff = WT_TIMEDIFF_SEC(current_time, conn->evict_tune_last_time); /* - * Every EVICT_TUNE_PERIOD seconds record the number of - * pages evicted per second observed in the previous period. + * If we have reached the stable state and have not run long enough to + * surpass the forced re-tuning threshold, return. */ - if (WT_TIMEDIFF_SEC( - current_time, conn->evict_tune_last_time) < EVICT_TUNE_PERIOD) - return (0); + if (conn->evict_tune_stable) { + if (time_diff < EVICT_FORCE_RETUNE) + return (0); + + /* + * Stable state was reached a long time ago. Let's re-tune. + * Reset all the state. + */ + conn->evict_tune_stable = 0; + conn->evict_tune_last_action_time.tv_sec = 0; + conn->evict_tune_pgs_last = 0; + conn->evict_tune_num_points = 0; + conn->evict_tune_pg_sec_max = 0; + conn->evict_tune_workers_best = 0; + + /* Reduce the number of eviction workers to the minimum */ + thread_surplus = conn->evict_threads.current_threads - + conn->evict_threads_min; + for (i = 0; i < thread_surplus; i++) { + WT_ERR(__wt_thread_group_stop_one( + session, &conn->evict_threads, false)); + WT_STAT_CONN_INCR(session, + cache_eviction_worker_removed); + } + WT_STAT_CONN_INCR(session, cache_eviction_force_retune); + } else + if (time_diff < EVICT_TUNE_PERIOD) + /* + * If we have not reached stable state, don't do + * anything unless enough time has passed since the last + * time we have taken any action in this function. + */ + return (0); + /* + * Measure the number of evicted pages so far. Eviction rate correlates + * to performance, so this is our metric of success. + */ pgs_evicted_cur = cache->pages_evict; /* @@ -1025,7 +1066,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_tune_stable = true; WT_STAT_CONN_SET(session, cache_eviction_active_workers, conn->evict_threads.current_threads); - return (0); + goto err; } } diff --git a/src/include/stat.h b/src/include/stat.h index ed3d588b7d3..bc7a7cab7ce 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -318,6 +318,7 @@ struct __wt_connection_stats { int64_t cache_eviction_force_fail; int64_t cache_eviction_walks_active; int64_t cache_eviction_walks_started; + int64_t cache_eviction_force_retune; int64_t cache_eviction_hazard; int64_t cache_hazard_checks; int64_t cache_hazard_walks; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 707159ef6ae..ced6df3d29d 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -4460,384 +4460,386 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1059 /*! cache: files with new eviction walks started */ #define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1060 +/*! cache: force re-tuning of eviction workers once in a while */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_RETUNE 1061 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1061 +#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1062 /*! cache: hazard pointer check calls */ -#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1062 +#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1063 /*! cache: hazard pointer check entries walked */ -#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1063 +#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1064 /*! cache: hazard pointer maximum array length */ -#define WT_STAT_CONN_CACHE_HAZARD_MAX 1064 +#define WT_STAT_CONN_CACHE_HAZARD_MAX 1065 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1065 +#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1066 /*! cache: in-memory page splits */ -#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1066 +#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1067 /*! cache: internal pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1067 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1068 /*! cache: internal pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1068 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1069 /*! cache: leaf pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1069 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1070 /*! cache: lookaside table insert calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1070 +#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1071 /*! cache: lookaside table remove calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1071 +#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1072 /*! cache: maximum bytes configured */ -#define WT_STAT_CONN_CACHE_BYTES_MAX 1072 +#define WT_STAT_CONN_CACHE_BYTES_MAX 1073 /*! cache: maximum page size at eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1073 +#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1074 /*! cache: modified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1074 +#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1075 /*! cache: modified pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1075 +#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1076 /*! cache: overflow pages read into cache */ -#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1076 +#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1077 /*! cache: overflow values cached in memory */ -#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1077 +#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1078 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1078 +#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1079 /*! cache: page written requiring lookaside records */ -#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1079 +#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1080 /*! cache: pages currently held in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_INUSE 1080 +#define WT_STAT_CONN_CACHE_PAGES_INUSE 1081 /*! cache: pages evicted because they exceeded the in-memory maximum */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1081 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1082 /*! cache: pages evicted because they had chains of deleted items */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1082 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1083 /*! cache: pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP 1083 +#define WT_STAT_CONN_CACHE_EVICTION_APP 1084 /*! cache: pages queued for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1084 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1085 /*! cache: pages queued for urgent eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1085 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1086 /*! cache: pages queued for urgent eviction during walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1086 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1087 /*! cache: pages read into cache */ -#define WT_STAT_CONN_CACHE_READ 1087 +#define WT_STAT_CONN_CACHE_READ 1088 /*! cache: pages read into cache requiring lookaside entries */ -#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1088 +#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1089 /*! cache: pages requested from the cache */ -#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1089 +#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1090 /*! cache: pages seen by eviction walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1090 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1091 /*! cache: pages selected for eviction unable to be evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1091 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1092 /*! cache: pages walked for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK 1092 +#define WT_STAT_CONN_CACHE_EVICTION_WALK 1093 /*! cache: pages written from cache */ -#define WT_STAT_CONN_CACHE_WRITE 1093 +#define WT_STAT_CONN_CACHE_WRITE 1094 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1094 +#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1095 /*! cache: percentage overhead */ -#define WT_STAT_CONN_CACHE_OVERHEAD 1095 +#define WT_STAT_CONN_CACHE_OVERHEAD 1096 /*! cache: tracked bytes belonging to internal pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1096 +#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1097 /*! cache: tracked bytes belonging to leaf pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_LEAF 1097 +#define WT_STAT_CONN_CACHE_BYTES_LEAF 1098 /*! cache: tracked dirty bytes in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1098 +#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1099 /*! cache: tracked dirty pages in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1099 +#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1100 /*! cache: unmodified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1100 +#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1101 /*! connection: auto adjusting condition resets */ -#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1101 +#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1102 /*! connection: auto adjusting condition wait calls */ -#define WT_STAT_CONN_COND_AUTO_WAIT 1102 +#define WT_STAT_CONN_COND_AUTO_WAIT 1103 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1103 +#define WT_STAT_CONN_FILE_OPEN 1104 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1104 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1105 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1105 +#define WT_STAT_CONN_MEMORY_FREE 1106 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1106 +#define WT_STAT_CONN_MEMORY_GROW 1107 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1107 +#define WT_STAT_CONN_COND_WAIT 1108 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1108 +#define WT_STAT_CONN_RWLOCK_READ 1109 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1109 +#define WT_STAT_CONN_RWLOCK_WRITE 1110 /*! connection: total fsync I/Os */ -#define WT_STAT_CONN_FSYNC_IO 1110 +#define WT_STAT_CONN_FSYNC_IO 1111 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1111 +#define WT_STAT_CONN_READ_IO 1112 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1112 +#define WT_STAT_CONN_WRITE_IO 1113 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1113 +#define WT_STAT_CONN_CURSOR_CREATE 1114 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1114 +#define WT_STAT_CONN_CURSOR_INSERT 1115 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1115 +#define WT_STAT_CONN_CURSOR_NEXT 1116 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1116 +#define WT_STAT_CONN_CURSOR_PREV 1117 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1117 +#define WT_STAT_CONN_CURSOR_REMOVE 1118 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1118 +#define WT_STAT_CONN_CURSOR_RESET 1119 /*! cursor: cursor restarted searches */ -#define WT_STAT_CONN_CURSOR_RESTART 1119 +#define WT_STAT_CONN_CURSOR_RESTART 1120 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1120 +#define WT_STAT_CONN_CURSOR_SEARCH 1121 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1121 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1122 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1122 +#define WT_STAT_CONN_CURSOR_UPDATE 1123 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1123 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1124 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1124 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1125 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1125 +#define WT_STAT_CONN_DH_SWEEP_REF 1126 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1126 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1127 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1127 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1128 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1128 +#define WT_STAT_CONN_DH_SWEEP_TOD 1129 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1129 +#define WT_STAT_CONN_DH_SWEEPS 1130 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1130 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1131 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1131 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1132 /*! lock: checkpoint lock acquisitions */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1132 +#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1133 /*! lock: checkpoint lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1133 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1134 /*! lock: checkpoint lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1134 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1135 /*! lock: handle-list lock eviction thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1135 +#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1136 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1136 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1137 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1137 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1138 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1138 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1139 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1139 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1140 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1140 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1141 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1141 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1142 /*! lock: table lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_COUNT 1142 +#define WT_STAT_CONN_LOCK_TABLE_COUNT 1143 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1143 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1144 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1144 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1145 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1145 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1146 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1146 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1147 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1147 +#define WT_STAT_CONN_LOG_SLOT_RACES 1148 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1148 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1149 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1149 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1150 /*! log: consolidated slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1150 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1151 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1151 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1152 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1152 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1153 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1153 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1154 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1154 +#define WT_STAT_CONN_LOG_FLUSH 1155 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1155 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1156 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1156 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1157 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1157 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1158 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1158 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1159 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1159 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1160 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1160 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1161 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1161 +#define WT_STAT_CONN_LOG_SCANS 1162 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1162 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1163 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1163 +#define WT_STAT_CONN_LOG_WRITE_LSN 1164 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1164 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1165 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1165 +#define WT_STAT_CONN_LOG_SYNC 1166 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1166 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1167 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1167 +#define WT_STAT_CONN_LOG_SYNC_DIR 1168 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1168 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1169 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1169 +#define WT_STAT_CONN_LOG_WRITES 1170 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1170 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1171 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1171 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1172 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1172 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1173 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1173 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1174 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1174 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1175 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1175 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1176 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1176 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1177 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1177 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1178 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1178 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1179 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1179 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1180 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1180 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1181 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1181 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1182 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1182 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1183 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1183 +#define WT_STAT_CONN_REC_PAGES 1184 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1184 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1185 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1185 +#define WT_STAT_CONN_REC_PAGE_DELETE 1186 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1186 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1187 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1187 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1188 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1188 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1189 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1189 +#define WT_STAT_CONN_SESSION_OPEN 1190 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1190 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1191 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1191 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1192 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1192 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1193 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1193 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1194 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1194 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1195 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1195 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1196 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1196 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1197 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1197 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1198 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1198 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1199 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1199 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1200 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1200 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1201 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1201 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1202 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1202 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1203 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1203 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1204 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1204 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1205 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1205 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1206 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1206 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1207 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1207 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1208 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1208 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1209 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1209 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1210 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1210 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1211 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1211 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1212 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1212 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1213 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1213 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1214 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1214 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1215 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1215 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1216 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1216 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1217 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1217 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1218 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1218 +#define WT_STAT_CONN_PAGE_SLEEP 1219 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1219 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1220 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1220 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1221 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1221 +#define WT_STAT_CONN_TXN_BEGIN 1222 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1222 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1223 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1223 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1224 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1224 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1225 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1225 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1226 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1226 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1227 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1227 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1228 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1228 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1229 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1229 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1230 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1230 +#define WT_STAT_CONN_TXN_CHECKPOINT 1231 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1231 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1232 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1232 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1233 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1233 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1234 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1234 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1235 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1235 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1236 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1236 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1237 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1237 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1238 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1238 +#define WT_STAT_CONN_TXN_SYNC 1239 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1239 +#define WT_STAT_CONN_TXN_COMMIT 1240 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1240 +#define WT_STAT_CONN_TXN_ROLLBACK 1241 /*! * @} diff --git a/src/support/stat.c b/src/support/stat.c index fd38e1b79ee..57c1ee06000 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -685,6 +685,7 @@ static const char * const __stats_connection_desc[] = { "cache: failed eviction of pages that exceeded the in-memory maximum", "cache: files with active eviction walks", "cache: files with new eviction walks started", + "cache: force re-tuning of eviction workers once in a while", "cache: hazard pointer blocked page eviction", "cache: hazard pointer check calls", "cache: hazard pointer check entries walked", @@ -968,6 +969,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_force_fail = 0; /* not clearing cache_eviction_walks_active */ stats->cache_eviction_walks_started = 0; + stats->cache_eviction_force_retune = 0; stats->cache_eviction_hazard = 0; stats->cache_hazard_checks = 0; stats->cache_hazard_walks = 0; @@ -1252,6 +1254,8 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, cache_eviction_walks_active); to->cache_eviction_walks_started += WT_STAT_READ(from, cache_eviction_walks_started); + to->cache_eviction_force_retune += + WT_STAT_READ(from, cache_eviction_force_retune); to->cache_eviction_hazard += WT_STAT_READ(from, cache_eviction_hazard); to->cache_hazard_checks += WT_STAT_READ(from, cache_hazard_checks); -- cgit v1.2.1 From 423f4e11050f7644b1a8d2b6b1cc60c35ef915c8 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Thu, 30 Mar 2017 21:01:05 -0400 Subject: WT-3243 Reorder log slot release so joins don't wait on IO (#3360) --- dist/stat_data.py | 2 + src/include/extern.h | 1 - src/include/log.h | 3 +- src/include/stat.h | 2 + src/include/wiredtiger.in | 192 ++++++++++++++++++++++--------------------- src/log/log_slot.c | 205 ++++++++++++++++++++++++---------------------- src/support/stat.c | 8 ++ 7 files changed, 221 insertions(+), 192 deletions(-) diff --git a/dist/stat_data.py b/dist/stat_data.py index 8fed3f3ac4a..ac79ffd029a 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -324,10 +324,12 @@ connection_stats = [ LogStat('log_scan_records', 'records processed by log scan'), LogStat('log_scan_rereads', 'log scan records requiring two reads'), LogStat('log_scans', 'log scan operations'), + LogStat('log_slot_active_closed', 'consolidated slot join active slot closed'), LogStat('log_slot_closes', 'consolidated slot closures'), LogStat('log_slot_coalesced', 'written slots coalesced'), LogStat('log_slot_consolidated', 'logging bytes consolidated', 'size'), LogStat('log_slot_joins', 'consolidated slot joins'), + LogStat('log_slot_no_free_slots', 'consolidated slot transitions unable to find free slot'), LogStat('log_slot_races', 'consolidated slot join races'), LogStat('log_slot_switch_busy', 'busy returns attempting to switch slots'), LogStat('log_slot_transitions', 'consolidated slot join transitions'), diff --git a/src/include/extern.h b/src/include/extern.h index 47b4e03a7b7..c0a6087e9b1 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -405,7 +405,6 @@ extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_switch( WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_slot_new(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/log.h b/src/include/log.h index f0999ba316b..fb3c961417f 100644 --- a/src/include/log.h +++ b/src/include/log.h @@ -193,7 +193,8 @@ struct __wt_myslot { wt_off_t end_offset; /* My end offset in buffer */ wt_off_t offset; /* Slot buffer offset */ #define WT_MYSLOT_CLOSE 0x01 /* This thread is closing the slot */ -#define WT_MYSLOT_UNBUFFERED 0x02 /* Write directly */ +#define WT_MYSLOT_NEEDS_RELEASE 0x02 /* This thread is releasing the slot */ +#define WT_MYSLOT_UNBUFFERED 0x04 /* Write directly */ uint32_t flags; /* Flags */ }; diff --git a/src/include/stat.h b/src/include/stat.h index bc7a7cab7ce..6c274484bcb 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -405,9 +405,11 @@ struct __wt_connection_stats { int64_t lock_table_wait_internal; int64_t log_slot_switch_busy; int64_t log_slot_closes; + int64_t log_slot_active_closed; int64_t log_slot_races; int64_t log_slot_transitions; int64_t log_slot_joins; + int64_t log_slot_no_free_slots; int64_t log_slot_unbuffered; int64_t log_bytes_payload; int64_t log_bytes_written; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index ced6df3d29d..ddecb2ac765 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -4640,206 +4640,210 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1146 /*! log: consolidated slot closures */ #define WT_STAT_CONN_LOG_SLOT_CLOSES 1147 +/*! log: consolidated slot join active slot closed */ +#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1148 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1148 +#define WT_STAT_CONN_LOG_SLOT_RACES 1149 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1149 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1150 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1150 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1151 +/*! log: consolidated slot transitions unable to find free slot */ +#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1152 /*! log: consolidated slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1151 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1153 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1152 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1154 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1153 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1155 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1154 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1156 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1155 +#define WT_STAT_CONN_LOG_FLUSH 1157 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1156 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1158 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1157 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1159 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1158 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1160 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1159 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1161 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1160 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1162 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1161 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1163 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1162 +#define WT_STAT_CONN_LOG_SCANS 1164 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1163 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1165 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1164 +#define WT_STAT_CONN_LOG_WRITE_LSN 1166 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1165 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1167 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1166 +#define WT_STAT_CONN_LOG_SYNC 1168 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1167 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1169 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1168 +#define WT_STAT_CONN_LOG_SYNC_DIR 1170 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1169 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1171 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1170 +#define WT_STAT_CONN_LOG_WRITES 1172 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1171 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1173 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1172 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1174 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1173 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1175 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1174 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1176 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1175 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1177 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1176 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1178 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1177 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1179 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1178 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1180 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1179 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1181 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1180 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1182 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1181 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1183 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1182 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1184 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1183 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1185 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1184 +#define WT_STAT_CONN_REC_PAGES 1186 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1185 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1187 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1186 +#define WT_STAT_CONN_REC_PAGE_DELETE 1188 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1187 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1189 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1188 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1190 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1189 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1191 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1190 +#define WT_STAT_CONN_SESSION_OPEN 1192 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1191 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1193 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1192 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1194 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1193 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1195 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1194 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1196 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1195 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1197 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1196 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1198 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1197 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1199 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1198 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1200 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1199 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1201 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1200 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1202 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1201 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1203 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1202 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1204 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1203 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1205 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1204 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1206 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1205 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1207 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1206 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1208 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1207 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1209 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1208 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1210 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1209 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1211 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1210 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1212 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1211 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1213 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1212 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1214 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1213 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1215 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1214 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1216 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1215 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1217 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1216 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1218 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1217 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1219 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1218 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1220 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1219 +#define WT_STAT_CONN_PAGE_SLEEP 1221 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1220 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1222 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1221 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1223 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1222 +#define WT_STAT_CONN_TXN_BEGIN 1224 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1223 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1225 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1224 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1226 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1225 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1227 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1226 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1228 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1227 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1229 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1228 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1230 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1229 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1231 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1230 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1232 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1231 +#define WT_STAT_CONN_TXN_CHECKPOINT 1233 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1232 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1234 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1233 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1235 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1234 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1236 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1235 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1237 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1236 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1238 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1237 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1239 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1238 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1240 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1239 +#define WT_STAT_CONN_TXN_SYNC 1241 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1240 +#define WT_STAT_CONN_TXN_COMMIT 1242 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1241 +#define WT_STAT_CONN_TXN_ROLLBACK 1243 /*! * @} diff --git a/src/log/log_slot.c b/src/log/log_slot.c index c685aec3ffc..512a84dbd13 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -195,103 +195,12 @@ retry: } /* - * __log_slot_switch_internal -- - * Switch out the current slot and set up a new one. - */ -static int -__log_slot_switch_internal( - WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool forced) -{ - WT_DECL_RET; - WT_LOG *log; - WT_LOGSLOT *slot; - bool free_slot, release; - - log = S2C(session)->log; - release = false; - slot = myslot->slot; - - WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT)); - - /* - * If someone else raced us to closing this specific slot, we're - * done here. - */ - if (slot != log->active_slot) - return (0); - - WT_RET(WT_SESSION_CHECK_PANIC(session)); - /* - * We may come through here multiple times if we were able to close - * a slot but could not set up a new one. If we closed it already, - * don't try to do it again but still set up the new slot. - */ - if (!F_ISSET(myslot, WT_MYSLOT_CLOSE)) { - ret = __log_slot_close(session, slot, &release, forced); - /* - * If close returns WT_NOTFOUND it means that someone else - * is processing the slot change. - */ - if (ret == WT_NOTFOUND) - return (0); - WT_RET(ret); - if (release) { - WT_RET(__wt_log_release(session, slot, &free_slot)); - if (free_slot) - __wt_log_slot_free(session, slot); - } - } - /* - * Set that we have closed this slot because we may call in here - * multiple times if we retry creating a new slot. - */ - F_SET(myslot, WT_MYSLOT_CLOSE); - WT_RET(__wt_log_slot_new(session)); - F_CLR(myslot, WT_MYSLOT_CLOSE); - return (0); -} - -/* - * __wt_log_slot_switch -- - * Switch out the current slot and set up a new one. - */ -int -__wt_log_slot_switch( - WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced) -{ - WT_DECL_RET; - WT_LOG *log; - - log = S2C(session)->log; - /* - * !!! Since the WT_WITH_SLOT_LOCK macro is a do-while loop, the - * compiler does not like it combined directly with the while loop - * here. - * - * The loop conditional is a bit complex. We have to retry if we - * closed the slot but were unable to set up a new slot. In that - * case the flag indicating we have closed the slot will still be set. - * We have to retry in that case regardless of the retry setting - * because we are responsible for setting up the new slot. - */ - do { - WT_WITH_SLOT_LOCK(session, log, - ret = __log_slot_switch_internal(session, myslot, forced)); - if (ret == EBUSY) { - WT_STAT_CONN_INCR(session, log_slot_switch_busy); - __wt_yield(); - } - } while (F_ISSET(myslot, WT_MYSLOT_CLOSE) || (retry && ret == EBUSY)); - return (ret); -} - -/* - * __wt_log_slot_new -- + * __log_slot_new -- * Find a free slot and switch it as the new active slot. * Must be called holding the slot lock. */ -int -__wt_log_slot_new(WT_SESSION_IMPL *session) +static int +__log_slot_new(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_LOG *log; @@ -351,6 +260,7 @@ __wt_log_slot_new(WT_SESSION_IMPL *session) /* * If we didn't find any free slots signal the worker thread. */ + WT_STAT_CONN_INCR(session, log_slot_no_free_slots); __wt_cond_signal(session, conn->log_wrlsn_cond); __wt_yield(); #ifdef HAVE_DIAGNOSTIC @@ -370,6 +280,108 @@ __wt_log_slot_new(WT_SESSION_IMPL *session) /* NOTREACHED */ } +/* + * __log_slot_switch_internal -- + * Switch out the current slot and set up a new one. + */ +static int +__log_slot_switch_internal( + WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool forced) +{ + WT_DECL_RET; + WT_LOG *log; + WT_LOGSLOT *slot; + bool free_slot, release; + + log = S2C(session)->log; + release = false; + slot = myslot->slot; + + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT)); + + /* + * If someone else raced us to closing this specific slot, we're + * done here. + */ + if (slot != log->active_slot) + return (0); + WT_RET(WT_SESSION_CHECK_PANIC(session)); + + /* + * We may come through here multiple times if we were not able to + * set up a new one. If we closed it already, + * don't try to do it again but still set up the new slot. + */ + if (!F_ISSET(myslot, WT_MYSLOT_CLOSE)) { + ret = __log_slot_close(session, slot, &release, forced); + /* + * If close returns WT_NOTFOUND it means that someone else + * is processing the slot change. + */ + if (ret == WT_NOTFOUND) + return (0); + WT_RET(ret); + /* + * Set that we have closed this slot because we may call in here + * multiple times if we retry creating a new slot. Similarly + * set retain whether this slot needs releasing so that we don't + * lose that information if we retry. + */ + F_SET(myslot, WT_MYSLOT_CLOSE); + if (release) + F_SET(myslot, WT_MYSLOT_NEEDS_RELEASE); + } + /* + * Now that the slot is closed, set up a new one so that joining + * threads don't have to wait on writing the previous slot if we + * release it. Release after setting a new one. + */ + WT_RET(__log_slot_new(session)); + F_CLR(myslot, WT_MYSLOT_CLOSE); + if (F_ISSET(myslot, WT_MYSLOT_NEEDS_RELEASE)) { + WT_RET(__wt_log_release(session, slot, &free_slot)); + F_CLR(myslot, WT_MYSLOT_NEEDS_RELEASE); + if (free_slot) + __wt_log_slot_free(session, slot); + } + return (ret); +} + +/* + * __wt_log_slot_switch -- + * Switch out the current slot and set up a new one. + */ +int +__wt_log_slot_switch( + WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced) +{ + WT_DECL_RET; + WT_LOG *log; + + log = S2C(session)->log; + + /* + * !!! Since the WT_WITH_SLOT_LOCK macro is a do-while loop, the + * compiler does not like it combined directly with the while loop + * here. + * + * The loop conditional is a bit complex. We have to retry if we + * closed the slot but were unable to set up a new slot. In that + * case the flag indicating we have closed the slot will still be set. + * We have to retry in that case regardless of the retry setting + * because we are responsible for setting up the new slot. + */ + do { + WT_WITH_SLOT_LOCK(session, log, + ret = __log_slot_switch_internal(session, myslot, forced)); + if (ret == EBUSY) { + WT_STAT_CONN_INCR(session, log_slot_switch_busy); + __wt_yield(); + } + } while (F_ISSET(myslot, WT_MYSLOT_CLOSE) || (retry && ret == EBUSY)); + return (ret); +} + /* * __wt_log_slot_init -- * Initialize the slot array. @@ -531,12 +543,13 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, if (__wt_atomic_casiv64( &slot->slot_state, old_state, new_state)) break; - } + WT_STAT_CONN_INCR(session, log_slot_races); + } else + WT_STAT_CONN_INCR(session, log_slot_active_closed); /* * The slot is no longer open or we lost the race to * update it. Yield and try again. */ - WT_STAT_CONN_INCR(session, log_slot_races); __wt_yield(); } /* diff --git a/src/support/stat.c b/src/support/stat.c index 57c1ee06000..2c2217f8c20 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -772,9 +772,11 @@ static const char * const __stats_connection_desc[] = { "lock: table lock internal thread time waiting for the table lock (usecs)", "log: busy returns attempting to switch slots", "log: consolidated slot closures", + "log: consolidated slot join active slot closed", "log: consolidated slot join races", "log: consolidated slot join transitions", "log: consolidated slot joins", + "log: consolidated slot transitions unable to find free slot", "log: consolidated slot unbuffered writes", "log: log bytes of payload data", "log: log bytes written", @@ -1056,9 +1058,11 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->lock_table_wait_internal = 0; stats->log_slot_switch_busy = 0; stats->log_slot_closes = 0; + stats->log_slot_active_closed = 0; stats->log_slot_races = 0; stats->log_slot_transitions = 0; stats->log_slot_joins = 0; + stats->log_slot_no_free_slots = 0; stats->log_slot_unbuffered = 0; stats->log_bytes_payload = 0; stats->log_bytes_written = 0; @@ -1370,9 +1374,13 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, lock_table_wait_internal); to->log_slot_switch_busy += WT_STAT_READ(from, log_slot_switch_busy); to->log_slot_closes += WT_STAT_READ(from, log_slot_closes); + to->log_slot_active_closed += + WT_STAT_READ(from, log_slot_active_closed); to->log_slot_races += WT_STAT_READ(from, log_slot_races); to->log_slot_transitions += WT_STAT_READ(from, log_slot_transitions); to->log_slot_joins += WT_STAT_READ(from, log_slot_joins); + to->log_slot_no_free_slots += + WT_STAT_READ(from, log_slot_no_free_slots); to->log_slot_unbuffered += WT_STAT_READ(from, log_slot_unbuffered); to->log_bytes_payload += WT_STAT_READ(from, log_bytes_payload); to->log_bytes_written += WT_STAT_READ(from, log_bytes_written); -- cgit v1.2.1 From 871889c0b87dcd2560704248eba0a4b119ca26f1 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Tue, 4 Apr 2017 00:31:05 +1000 Subject: WT-3250 Fix spinlock statistics tracking on Windows. (#3363) MongoDB user on Windows noticed the "LSM: application work units currently queued" statistic was changing in a configuration that involved no LSM code. This was caused by a bug in code that tracks time spent in spinlocks incrementing the wrong statistic. In particular, spinlocks contain fields describing which statistics should be used to track time spent in that spinlock. A value of -1 indicates that the spinlock should not be tracked, but a value of zero is the first statistic in the array for a connection, which happens to be the "LSM: application work units currently queued" statistic. The Windows implementation of spinlocks was not setting these fields to -1, leading to the bug. This bug was introduced by WT 2955 and also meant that every WiredTiger spinlock on Windows was being timed, which may have negatively impacted Windows performance. --- src/include/mutex.i | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/include/mutex.i b/src/include/mutex.i index 640706284c3..eb95d76a1a2 100644 --- a/src/include/mutex.i +++ b/src/include/mutex.i @@ -32,7 +32,9 @@ __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) WT_UNUSED(name); t->lock = 0; + t->name = name; t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1; + t->initialized = 1; return (0); } @@ -196,6 +198,7 @@ __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) } t->name = name; + t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1; t->initialized = 1; return (0); } -- cgit v1.2.1 From e8efd76093d126a8d7b8e21c650123e96e9d6f13 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Mon, 3 Apr 2017 12:51:14 -0400 Subject: WT-3250 Have one function initializing the WT portion of the spinlock. (#3364) Unify spinlock structures. --- src/include/mutex.h | 28 ++++------------------------ src/include/mutex.i | 26 +++++++++++++++----------- 2 files changed, 19 insertions(+), 35 deletions(-) diff --git a/src/include/mutex.h b/src/include/mutex.h index 06b8c4a3304..910eb7af5b9 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -62,31 +62,17 @@ union __wt_rwlock { /* Read/write lock */ #define SPINLOCK_PTHREAD_MUTEX 2 #define SPINLOCK_PTHREAD_MUTEX_ADAPTIVE 3 -#if SPINLOCK_TYPE == SPINLOCK_GCC - struct __wt_spinlock { WT_CACHE_LINE_PAD_BEGIN +#if SPINLOCK_TYPE == SPINLOCK_GCC volatile int lock; - - /* - * We track acquisitions and time spent waiting for some locks. For - * performance reasons and to make it possible to write generic code - * that tracks statistics for different locks, we store the offset - * of the statistics fields to be updated during lock acquisition. - */ - int16_t stat_count_off; /* acquisitions offset */ - int16_t stat_app_usecs_off; /* waiting application threads offset */ - int16_t stat_int_usecs_off; /* waiting server threads offset */ - WT_CACHE_LINE_PAD_END -}; - #elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE ||\ SPINLOCK_TYPE == SPINLOCK_MSVC - -struct __wt_spinlock { - WT_CACHE_LINE_PAD_BEGIN wt_mutex_t lock; +#else +#error Unknown spinlock type +#endif const char *name; /* Mutex name */ @@ -103,9 +89,3 @@ struct __wt_spinlock { int8_t initialized; /* Lock initialized, for cleanup */ WT_CACHE_LINE_PAD_END }; - -#else - -#error Unknown spinlock type - -#endif diff --git a/src/include/mutex.i b/src/include/mutex.i index eb95d76a1a2..2d483972ed2 100644 --- a/src/include/mutex.i +++ b/src/include/mutex.i @@ -14,6 +14,18 @@ * of instructions. */ +/* + * __spin_init_internal -- + * Initialize the WT portion of a spinlock. + */ +static inline void +__spin_init_internal(WT_SPINLOCK *t, const char *name) +{ + t->name = name; + t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1; + t->initialized = 1; +} + #if SPINLOCK_TYPE == SPINLOCK_GCC /* Default to spinning 1000 times before yielding. */ @@ -29,12 +41,9 @@ static inline int __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) { WT_UNUSED(session); - WT_UNUSED(name); t->lock = 0; - t->name = name; - t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1; - t->initialized = 1; + __spin_init_internal(t, name); return (0); } @@ -112,10 +121,7 @@ __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) #else WT_RET(pthread_mutex_init(&t->lock, NULL)); #endif - - t->name = name; - t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1; - t->initialized = 1; + __spin_init_internal(t, name); WT_UNUSED(session); return (0); @@ -197,9 +203,7 @@ __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name) return (__wt_map_windows_error(windows_error)); } - t->name = name; - t->stat_count_off = t->stat_app_usecs_off = t->stat_int_usecs_off = -1; - t->initialized = 1; + __spin_init_internal(t, name); return (0); } -- cgit v1.2.1 From 27b483289376e8441da87723a5b6a2ec420ad858 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Tue, 4 Apr 2017 15:10:52 -0400 Subject: WT-3254 Fix typo in reconfig string (#3366) --- test/suite/test_reconfig02.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/suite/test_reconfig02.py b/test/suite/test_reconfig02.py index 8054b2a6ab5..042d3bbe71f 100644 --- a/test/suite/test_reconfig02.py +++ b/test/suite/test_reconfig02.py @@ -62,7 +62,7 @@ class test_reconfig02(wttest.WiredTigerTestCase): self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.conn.reconfigure("log=(path=foo)"), msg) self.assertRaisesWithMessage(wiredtiger.WiredTigerError, - lambda: self.conn.reconfigure("log=(recovery=true)"), msg) + lambda: self.conn.reconfigure("log=(recover=true)"), msg) # Logging starts on, but prealloc is off. Verify it is off. # Reconfigure it on and run again, making sure that log files -- cgit v1.2.1 From adbe2ec5cd6dc2da2af913087b53c402b2f0b87c Mon Sep 17 00:00:00 2001 From: sueloverso Date: Tue, 4 Apr 2017 15:48:22 -0400 Subject: WT-3249 Look at slot_state during force while holding lock. (#3365) We could race an in-progress switch that set a new, empty active slot but has not yet released the previously active slot and get an incorrect LSN. --- src/include/extern.h | 2 +- src/log/log.c | 12 ++---------- src/log/log_slot.c | 22 ++++++++++++++++++---- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/include/extern.h b/src/include/extern.h index c0a6087e9b1..55ba1bada7c 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -404,7 +404,7 @@ extern int __wt_logop_row_truncate_unpack( WT_SESSION_IMPL *session, const uint8 extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_slot_switch( WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/log/log.c b/src/log/log.c index 5b24250fffc..803d3e8dfab 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -1919,7 +1919,6 @@ __wt_log_force_write(WT_SESSION_IMPL *session, bool retry, bool *did_work) { WT_LOG *log; WT_MYSLOT myslot; - uint32_t joined; log = S2C(session)->log; memset(&myslot, 0, sizeof(myslot)); @@ -1927,14 +1926,7 @@ __wt_log_force_write(WT_SESSION_IMPL *session, bool retry, bool *did_work) if (did_work != NULL) *did_work = true; myslot.slot = log->active_slot; - joined = WT_LOG_SLOT_JOINED(log->active_slot->slot_state); - if (joined == 0) { - WT_STAT_CONN_INCR(session, log_force_write_skip); - if (did_work != NULL) - *did_work = false; - return (0); - } - return (__wt_log_slot_switch(session, &myslot, retry, true)); + return (__wt_log_slot_switch(session, &myslot, retry, true, did_work)); } /* @@ -2146,7 +2138,7 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, ret = 0; if (myslot.end_offset >= WT_LOG_SLOT_BUF_MAX || F_ISSET(&myslot, WT_MYSLOT_UNBUFFERED) || force) - ret = __wt_log_slot_switch(session, &myslot, true, false); + ret = __wt_log_slot_switch(session, &myslot, true, false, NULL); if (ret == 0) ret = __log_fill(session, &myslot, false, record, &lsn); release_size = __wt_log_slot_release( diff --git a/src/log/log_slot.c b/src/log/log_slot.c index 512a84dbd13..97e317ce68c 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -286,12 +286,13 @@ __log_slot_new(WT_SESSION_IMPL *session) */ static int __log_slot_switch_internal( - WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool forced) + WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool forced, bool *did_work) { WT_DECL_RET; WT_LOG *log; WT_LOGSLOT *slot; bool free_slot, release; + uint32_t joined; log = S2C(session)->log; release = false; @@ -305,6 +306,18 @@ __log_slot_switch_internal( */ if (slot != log->active_slot) return (0); + /* + * If the current active slot is unused and this is a forced switch, + * we're done. If this is a non-forced switch we always switch + * because the slot could be part of an unbuffered operation. + */ + joined = WT_LOG_SLOT_JOINED(slot->slot_state); + if (joined == 0 && forced) { + WT_STAT_CONN_INCR(session, log_force_write_skip); + if (did_work != NULL) + *did_work = false; + return (0); + } WT_RET(WT_SESSION_CHECK_PANIC(session)); /* @@ -352,8 +365,8 @@ __log_slot_switch_internal( * Switch out the current slot and set up a new one. */ int -__wt_log_slot_switch( - WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced) +__wt_log_slot_switch(WT_SESSION_IMPL *session, + WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work) { WT_DECL_RET; WT_LOG *log; @@ -373,7 +386,8 @@ __wt_log_slot_switch( */ do { WT_WITH_SLOT_LOCK(session, log, - ret = __log_slot_switch_internal(session, myslot, forced)); + ret = __log_slot_switch_internal( + session, myslot, forced, did_work)); if (ret == EBUSY) { WT_STAT_CONN_INCR(session, log_slot_switch_busy); __wt_yield(); -- cgit v1.2.1 From d924d58f8b3becf2ecb8ace720da9e5ac6645568 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Tue, 4 Apr 2017 23:43:10 -0400 Subject: WT-3247 Test should exit instead of abort to avoid a core dump. (#3367) --- test/recovery/truncated-log.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/test/recovery/truncated-log.c b/test/recovery/truncated-log.c index a127d8c1c63..89e282bbb96 100644 --- a/test/recovery/truncated-log.c +++ b/test/recovery/truncated-log.c @@ -130,7 +130,7 @@ usage(void) /* * Child process creates the database and table, and then writes data into - * the table until it is killed by the parent. + * the table until it switches into log file 2. */ static void fill_db(void) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); @@ -246,7 +246,7 @@ fill_db(void) } if (fclose(fp) != 0) testutil_die(errno, "fclose"); - abort(); + exit(0); /* NOTREACHED */ } @@ -286,9 +286,7 @@ main(int argc, char *argv[]) testutil_make_work_dir(home); /* - * Fork a child to insert as many items. We will then randomly - * kill the child, run recovery and make sure all items we wrote - * exist after recovery runs. + * Fork a child to do its work. Wait for it to exit. */ if ((pid = fork()) < 0) testutil_die(errno, "fork"); -- cgit v1.2.1 From 963624a0c6a63419199e144526cd6912d3303511 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 4 Apr 2017 23:54:41 -0400 Subject: WT-3076 Add a general-purpose epoch manager (#3362) * Switch the table schema generation numbers over the general purpose generations manager. * Switch the btree split generation numbers over the general purpose generations manager. * Add replaced hazard pointer arrays to the session's stash for later free (__wt_txn_release() and all functions that release session resources will free old hazard pointer arrays where possible). * The connection's free-on-close functionality is no longer used outside of the config code, make it private again. * To convert hazard pointers to the standard generation code, we have to add enter/leave segments when looking at other session's hazard pointer arrays. (The only paths freeing hazard pointer memory are in the context of the thread that "owns" the memory, so there's no need to enter the hazard pointer resource generation when the thread that owns the hazard pointer memory is examining it, it can't be freed from underneath that thread.) --- dist/filelist | 1 + src/btree/bt_split.c | 156 ++----------------- src/config/config_api.c | 18 +-- src/conn/conn_handle.c | 11 +- src/conn/conn_open.c | 2 +- src/conn/conn_stat.c | 4 +- src/evict/evict_lru.c | 3 +- src/include/btmem.h | 17 +-- src/include/btree.i | 4 +- src/include/connection.h | 9 +- src/include/extern.h | 14 +- src/include/schema.h | 4 +- src/include/session.h | 40 ++--- src/include/txn.h | 1 - src/include/txn.i | 2 +- src/include/wt_internal.h | 6 +- src/reconcile/rec_write.c | 9 +- src/schema/schema_alter.c | 2 +- src/schema/schema_drop.c | 2 +- src/schema/schema_list.c | 3 +- src/schema/schema_open.c | 2 +- src/schema/schema_rename.c | 2 +- src/session/session_api.c | 3 + src/session/session_compact.c | 4 +- src/support/generation.c | 346 ++++++++++++++++++++++++++++++++++++++++++ src/support/hazard.c | 30 +++- src/txn/txn.c | 10 +- src/txn/txn_ckpt.c | 16 +- 28 files changed, 476 insertions(+), 245 deletions(-) create mode 100644 src/support/generation.c diff --git a/dist/filelist b/dist/filelist index 5a3348b940a..6b6e617c4b1 100644 --- a/dist/filelist +++ b/dist/filelist @@ -179,6 +179,7 @@ src/session/session_salvage.c src/support/cond_auto.c src/support/crypto.c src/support/err.c +src/support/generation.c src/support/global.c src/support/hash_city.c src/support/hash_fnv.c diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 49043c8bab4..6f03da161cd 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -30,143 +30,6 @@ typedef enum { WT_ERR_RETURN /* Clean up and return error */ } WT_SPLIT_ERROR_PHASE; -/* - * __split_oldest_gen -- - * Calculate the oldest active split generation. - */ -static uint64_t -__split_oldest_gen(WT_SESSION_IMPL *session) -{ - WT_CONNECTION_IMPL *conn; - WT_SESSION_IMPL *s; - uint64_t gen, oldest; - u_int i, session_cnt; - - conn = S2C(session); - WT_ORDERED_READ(session_cnt, conn->session_cnt); - for (i = 0, s = conn->sessions, oldest = conn->split_gen + 1; - i < session_cnt; - i++, s++) - if (((gen = s->split_gen) != 0) && gen < oldest) - oldest = gen; - - return (oldest); -} - -/* - * __wt_split_obsolete -- - * Check if it is safe to free / evict based on split generation. - */ -bool -__wt_split_obsolete(WT_SESSION_IMPL *session, uint64_t split_gen) -{ - return (split_gen < __split_oldest_gen(session)); -} - -/* - * __split_stash_add -- - * Add a new entry into the session's split stash list. - */ -static int -__split_stash_add( - WT_SESSION_IMPL *session, uint64_t split_gen, void *p, size_t len) -{ - WT_CONNECTION_IMPL *conn; - WT_SPLIT_STASH *stash; - - WT_ASSERT(session, p != NULL); - - conn = S2C(session); - - /* Grow the list as necessary. */ - WT_RET(__wt_realloc_def(session, &session->split_stash_alloc, - session->split_stash_cnt + 1, &session->split_stash)); - - stash = session->split_stash + session->split_stash_cnt++; - stash->split_gen = split_gen; - stash->p = p; - stash->len = len; - - (void)__wt_atomic_add64(&conn->split_stashed_bytes, len); - (void)__wt_atomic_add64(&conn->split_stashed_objects, 1); - - /* See if we can free any previous entries. */ - if (session->split_stash_cnt > 1) - __wt_split_stash_discard(session); - - return (0); -} - -/* - * __wt_split_stash_discard -- - * Discard any memory from a session's split stash that we can. - */ -void -__wt_split_stash_discard(WT_SESSION_IMPL *session) -{ - WT_CONNECTION_IMPL *conn; - WT_SPLIT_STASH *stash; - uint64_t oldest; - size_t i; - - conn = S2C(session); - - /* Get the oldest split generation. */ - oldest = __split_oldest_gen(session); - - for (i = 0, stash = session->split_stash; - i < session->split_stash_cnt; - ++i, ++stash) { - if (stash->p == NULL) - continue; - if (stash->split_gen >= oldest) - break; - /* - * It's a bad thing if another thread is in this memory after - * we free it, make sure nothing good happens to that thread. - */ - (void)__wt_atomic_sub64(&conn->split_stashed_bytes, stash->len); - (void)__wt_atomic_sub64(&conn->split_stashed_objects, 1); - __wt_overwrite_and_free_len(session, stash->p, stash->len); - } - - /* - * If there are enough free slots at the beginning of the list, shuffle - * everything down. - */ - if (i > 100 || i == session->split_stash_cnt) - if ((session->split_stash_cnt -= i) > 0) - memmove(session->split_stash, stash, - session->split_stash_cnt * sizeof(*stash)); -} - -/* - * __wt_split_stash_discard_all -- - * Discard all memory from a session's split stash. - */ -void -__wt_split_stash_discard_all( - WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session) -{ - WT_SPLIT_STASH *stash; - size_t i; - - /* - * This function is called during WT_CONNECTION.close to discard any - * memory that remains. For that reason, we take two WT_SESSION_IMPL - * arguments: session_safe is still linked to the WT_CONNECTION and - * can be safely used for calls to other WiredTiger functions, while - * session is the WT_SESSION_IMPL we're cleaning up. - */ - for (i = 0, stash = session->split_stash; - i < session->split_stash_cnt; - ++i, ++stash) - __wt_free(session_safe, stash->p); - - __wt_free(session_safe, session->split_stash); - session->split_stash_cnt = session->split_stash_alloc = 0; -} - /* * __split_safe_free -- * Free a buffer if we can be sure no thread is accessing it, or schedule @@ -177,13 +40,14 @@ __split_safe_free(WT_SESSION_IMPL *session, uint64_t split_gen, bool exclusive, void *p, size_t s) { /* We should only call safe free if we aren't pinning the memory. */ - WT_ASSERT(session, session->split_gen != split_gen); + WT_ASSERT(session, + __wt_session_gen(session, WT_GEN_SPLIT) != split_gen); /* * We have swapped something in a page: if we don't have exclusive * access, check whether there are other threads in the same tree. */ - if (!exclusive && __split_oldest_gen(session) > split_gen) + if (!exclusive && __wt_gen_oldest(session, WT_GEN_SPLIT) > split_gen) exclusive = true; if (exclusive) { @@ -191,7 +55,7 @@ __split_safe_free(WT_SESSION_IMPL *session, return (0); } - return (__split_stash_add(session, split_gen, p, s)); + return (__wt_stash_add(session, WT_GEN_SPLIT, split_gen, p, s)); } #ifdef HAVE_DIAGNOSTIC @@ -645,7 +509,8 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) * generation to block splits in newly created pages, so get one. */ WT_ENTER_PAGE_INDEX(session); - __split_ref_prepare(session, alloc_index, session->split_gen, false); + __split_ref_prepare(session, alloc_index, + __wt_session_gen(session, WT_GEN_SPLIT), false); /* * Confirm the root page's index hasn't moved, then update it, which @@ -662,7 +527,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) * after the new index is swapped into place in order to know that no * readers are looking at the old index. */ - split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); + split_gen = __wt_gen_next(session, WT_GEN_SPLIT); root->pg_intl_split_gen = split_gen; #ifdef HAVE_DIAGNOSTIC @@ -848,7 +713,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, * the new index is swapped into place in order to know that no readers * are looking at the old index. */ - split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); + split_gen = __wt_gen_next(session, WT_GEN_SPLIT); parent->pg_intl_split_gen = split_gen; /* @@ -1173,7 +1038,8 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) * generation to block splits in newly created pages, so get one. */ WT_ENTER_PAGE_INDEX(session); - __split_ref_prepare(session, alloc_index, session->split_gen, true); + __split_ref_prepare(session, alloc_index, + __wt_session_gen(session, WT_GEN_SPLIT), true); /* Split into the parent. */ if ((ret = __split_parent(session, page_ref, alloc_index->index, @@ -1194,7 +1060,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) * after the new index is swapped into place in order to know that no * readers are looking at the old index. */ - split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1); + split_gen = __wt_gen_next(session, WT_GEN_SPLIT); page->pg_intl_split_gen = split_gen; #ifdef HAVE_DIAGNOSTIC diff --git a/src/config/config_api.c b/src/config/config_api.c index c1299baaafe..c3e590a171a 100644 --- a/src/config/config_api.c +++ b/src/config/config_api.c @@ -158,11 +158,11 @@ wiredtiger_config_validate(WT_SESSION *wt_session, } /* - * __wt_conn_foc_add -- + * __conn_foc_add -- * Add a new entry into the connection's free-on-close list. */ -void -__wt_conn_foc_add(WT_SESSION_IMPL *session, const void *p) +static void +__conn_foc_add(WT_SESSION_IMPL *session, const void *p) { WT_CONNECTION_IMPL *conn; @@ -327,12 +327,12 @@ __wt_configure_method(WT_SESSION_IMPL *session, * order to avoid freeing chunks of memory twice. Again, this isn't a * commonly used API and it shouldn't ever happen, just leak it. */ - __wt_conn_foc_add(session, entry->base); - __wt_conn_foc_add(session, entry); - __wt_conn_foc_add(session, checks); - __wt_conn_foc_add(session, newcheck->type); - __wt_conn_foc_add(session, newcheck->checks); - __wt_conn_foc_add(session, newcheck_name); + __conn_foc_add(session, entry->base); + __conn_foc_add(session, entry); + __conn_foc_add(session, checks); + __conn_foc_add(session, newcheck->type); + __conn_foc_add(session, newcheck->checks); + __conn_foc_add(session, newcheck_name); /* * Instead of using locks to protect configuration information, assume diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index 287e9ca7b99..1266029671e 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -81,15 +81,8 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_RET(__wt_cond_alloc( session, "LSM worker cond", &conn->lsm_manager.work_cond)); - /* - * Generation numbers. - * - * Start split generations at one. Threads publish this generation - * number before examining tree structures, and zero when they leave. - * We need to distinguish between threads that are in a tree before the - * first split has happened, and threads that are not in a tree. - */ - conn->split_gen = 1; + /* Initialize the generation manager. */ + __wt_gen_init(session); /* * Block manager. diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index eb3c79422a0..2ec1002331a 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -192,7 +192,7 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) for (i = 0; i < conn->session_size; ++s, ++i) { __wt_free(session, s->dhhash); __wt_free(session, s->tablehash); - __wt_split_stash_discard_all(session, s); + __wt_stash_discard_all(session, s); __wt_free(session, s->hazard); } diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c index d89392b66c6..1247fae84ac 100644 --- a/src/conn/conn_stat.c +++ b/src/conn/conn_stat.c @@ -83,9 +83,9 @@ __wt_conn_stat_init(WT_SESSION_IMPL *session) stats, session_cursor_open, conn->open_cursor_count); WT_STAT_SET(session, stats, dh_conn_handle_count, conn->dhandle_count); WT_STAT_SET(session, - stats, rec_split_stashed_objects, conn->split_stashed_objects); + stats, rec_split_stashed_objects, conn->stashed_objects); WT_STAT_SET(session, - stats, rec_split_stashed_bytes, conn->split_stashed_bytes); + stats, rec_split_stashed_bytes, conn->stashed_bytes); } /* diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 3ce35c60f2e..47eb20acd6f 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1468,7 +1468,8 @@ retry: while (slot < max_entries) { ret = __evict_walk_file( session, queue, max_entries, &slot)); - WT_ASSERT(session, session->split_gen == 0); + WT_ASSERT(session, __wt_session_gen( + session, WT_GEN_SPLIT) == 0); } __wt_spin_unlock(session, &cache->evict_walk_lock); WT_ERR(ret); diff --git a/src/include/btmem.h b/src/include/btmem.h index f1bb08d2699..30243d2ae18 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -507,7 +507,8 @@ struct __wt_page { #define WT_INTL_INDEX_GET_SAFE(page) \ ((page)->u.intl.__index) #define WT_INTL_INDEX_GET(session, page, pindex) do { \ - WT_ASSERT(session, (session)->split_gen != 0); \ + WT_ASSERT(session, \ + __wt_session_gen(session, WT_GEN_SPLIT) != 0); \ (pindex) = WT_INTL_INDEX_GET_SAFE(page); \ } while (0) #define WT_INTL_INDEX_SET(page, v) do { \ @@ -1097,22 +1098,16 @@ struct __wt_insert_head { * already have a split generation, leave it alone. If our caller is examining * an index, we don't want the oldest split generation to move forward and * potentially free it. - * - * Check that we haven't raced with a split_gen update after publishing: we - * rely on the published value not being missed when scanning for the oldest - * active split_gen. */ #define WT_ENTER_PAGE_INDEX(session) do { \ - uint64_t __prev_split_gen = (session)->split_gen; \ + uint64_t __prev_split_gen = \ + __wt_session_gen(session, WT_GEN_SPLIT); \ if (__prev_split_gen == 0) \ - do { \ - WT_PUBLISH((session)->split_gen, \ - S2C(session)->split_gen); \ - } while ((session)->split_gen != S2C(session)->split_gen) + __wt_session_gen_enter(session, WT_GEN_SPLIT); #define WT_LEAVE_PAGE_INDEX(session) \ if (__prev_split_gen == 0) \ - (session)->split_gen = 0; \ + __wt_session_gen_leave(session, WT_GEN_SPLIT); \ } while (0) #define WT_WITH_PAGE_INDEX(session, e) \ diff --git a/src/include/btree.i b/src/include/btree.i index a4d88d5fda1..dcc73f51f1b 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1355,8 +1355,8 @@ __wt_page_can_evict( * discards its WT_REF array, and a thread traversing the original * parent page index might see a freed WT_REF. */ - if (WT_PAGE_IS_INTERNAL(page) && !__wt_split_obsolete( - session, page->pg_intl_split_gen)) + if (WT_PAGE_IS_INTERNAL(page) && + page->pg_intl_split_gen >= __wt_gen_oldest(session, WT_GEN_SPLIT)) return (false); /* diff --git a/src/include/connection.h b/src/include/connection.h index 6c23492e926..7e4ef212b82 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -210,10 +210,6 @@ struct __wt_connection_impl { WT_FH *lock_fh; /* Lock file handle */ - volatile uint64_t split_gen; /* Generation number for splits */ - uint64_t split_stashed_bytes; /* Atomic: split statistics */ - uint64_t split_stashed_objects; - /* * The connection keeps a cache of data handles. The set of handles * can grow quite large so we maintain both a simple list and a hash @@ -401,7 +397,10 @@ struct __wt_connection_impl { /* If non-zero, all buffers used for I/O will be aligned to this. */ size_t buffer_alignment; - uint32_t schema_gen; /* Schema generation number */ + uint64_t stashed_bytes; /* Atomic: stashed memory statistics */ + uint64_t stashed_objects; + /* Generations manager */ + volatile uint64_t generations[WT_GENERATIONS]; wt_off_t data_extend_len; /* file_extend data length */ wt_off_t log_extend_len; /* file_extend log length */ diff --git a/src/include/extern.h b/src/include/extern.h index 55ba1bada7c..b91190a4442 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -165,9 +165,6 @@ extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC extern int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern bool __wt_split_obsolete(WT_SESSION_IMPL *session, uint64_t split_gen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_split_stash_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_split_stash_discard_all( WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -222,7 +219,6 @@ extern int __wt_config_getones_none(WT_SESSION_IMPL *session, const char *config extern int __wt_config_gets_def(WT_SESSION_IMPL *session, const char **cfg, const char *key, int def, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_config_subgetraw(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_config_subgets(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_conn_foc_add(WT_SESSION_IMPL *session, const void *p) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_conn_foc_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_configure_method(WT_SESSION_IMPL *session, const char *method, const char *uri, const char *config, const char *type, const char *check) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_config_check(WT_SESSION_IMPL *session, const WT_CONFIG_ENTRY *entry, const char *config, size_t config_len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -647,6 +643,16 @@ extern int __wt_illegal_value(WT_SESSION_IMPL *session, const char *name) WT_GCC extern int __wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_unexpected_object_type( WT_SESSION_IMPL *session, const char *uri, const char *expect) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_gen_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern uint64_t __wt_gen(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern uint64_t __wt_gen_next(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern uint64_t __wt_gen_oldest(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern uint64_t __wt_session_gen(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_session_gen_enter(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_session_gen_leave(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_stash_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_stash_add(WT_SESSION_IMPL *session, int which, uint64_t generation, void *p, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_stash_discard_all(WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_library_init(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_breakpoint(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_attach(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/schema.h b/src/include/schema.h index 50e141d9921..1f12116e39f 100644 --- a/src/include/schema.h +++ b/src/include/schema.h @@ -68,8 +68,8 @@ struct __wt_table { bool cg_complete, idx_complete, is_simple; u_int ncolgroups, nindices, nkey_columns; - uint32_t refcnt; /* Number of open cursors */ - uint32_t schema_gen; /* Cached schema generation number */ + uint32_t refcnt; /* Number of open cursors */ + uint64_t schema_gen; /* Cached schema generation number */ }; /* diff --git a/src/include/session.h b/src/include/session.h index 674e92671b1..fca67fdbde6 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -167,25 +167,31 @@ struct __wt_session_impl { /* Hashed table reference list array */ TAILQ_HEAD(__tables_hash, __wt_table) *tablehash; + /* Generations manager */ +#define WT_GEN_CHECKPOINT 0 /* Checkpoint generation */ +#define WT_GEN_HAZARD 1 /* Hazard pointer */ +#define WT_GEN_SCHEMA 2 /* Schema version */ +#define WT_GEN_SPLIT 3 /* Page splits */ +#define WT_GENERATIONS 4 /* Total generation manager entries */ + volatile uint64_t generations[WT_GENERATIONS]; + /* - * Split stash memory persists past session close because it's accessed - * by threads of control other than the thread owning the session. - * - * Splits can "free" memory that may still be in use, and we use a - * split generation number to track it, that is, the session stores a - * reference to the memory and allocates a split generation; when no - * session is reading from that split generation, the memory can be - * freed for real. + * Session memory persists past session close because it's accessed by + * threads of control other than the thread owning the session. For + * example, btree splits and hazard pointers can "free" memory that's + * still in use. In order to eventually free it, it's stashed here with + * with its generation number; when no thread is reading in generation, + * the memory can be freed for real. */ - struct __wt_split_stash { - uint64_t split_gen; /* Split generation */ - void *p; /* Memory, length */ - size_t len; - } *split_stash; /* Split stash array */ - size_t split_stash_cnt; /* Array entries */ - size_t split_stash_alloc; /* Allocated bytes */ - - uint64_t split_gen; /* Reading split generation */ + struct __wt_session_stash { + struct __wt_stash { + void *p; /* Memory, length */ + size_t len; + uint64_t gen; /* Generation */ + } *list; + size_t cnt; /* Array entries */ + size_t alloc; /* Allocated bytes */ + } stash[WT_GENERATIONS]; /* * Hazard pointers. diff --git a/src/include/txn.h b/src/include/txn.h index 7e802c188ab..01d94e2f731 100644 --- a/src/include/txn.h +++ b/src/include/txn.h @@ -107,7 +107,6 @@ struct __wt_txn_global { */ volatile bool checkpoint_running; /* Checkpoint running */ volatile uint32_t checkpoint_id; /* Checkpoint's session ID */ - volatile uint64_t checkpoint_gen; /* Checkpoint generation */ volatile uint64_t checkpoint_pinned; /* Oldest ID for checkpoint */ volatile uint64_t checkpoint_txnid; /* Checkpoint's txn ID */ diff --git a/src/include/txn.i b/src/include/txn.i index 314c948e4d1..8c4cb4a8ab8 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -126,7 +126,7 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session) */ oldest_id = txn_global->oldest_id; include_checkpoint_txn = btree == NULL || - btree->checkpoint_gen != txn_global->checkpoint_gen; + btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT); WT_READ_BARRIER(); checkpoint_pinned = txn_global->checkpoint_pinned; diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h index da318ad8a86..27f732c6539 100644 --- a/src/include/wt_internal.h +++ b/src/include/wt_internal.h @@ -276,12 +276,14 @@ struct __wt_scratch_track; typedef struct __wt_scratch_track WT_SCRATCH_TRACK; struct __wt_session_impl; typedef struct __wt_session_impl WT_SESSION_IMPL; +struct __wt_session_stash; + typedef struct __wt_session_stash WT_SESSION_STASH; struct __wt_size; typedef struct __wt_size WT_SIZE; struct __wt_spinlock; typedef struct __wt_spinlock WT_SPINLOCK; -struct __wt_split_stash; - typedef struct __wt_split_stash WT_SPLIT_STASH; +struct __wt_stash; + typedef struct __wt_stash WT_STASH; struct __wt_table; typedef struct __wt_table WT_TABLE; struct __wt_thread; diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 6f95b84d292..7080e308007 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -526,10 +526,8 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, static inline bool __rec_las_checkpoint_test(WT_SESSION_IMPL *session, WT_RECONCILE *r) { - WT_CONNECTION_IMPL *conn; WT_BTREE *btree; - conn = S2C(session); btree = S2BT(session); /* @@ -550,7 +548,8 @@ __rec_las_checkpoint_test(WT_SESSION_IMPL *session, WT_RECONCILE *r) if (F_ISSET(btree, WT_BTREE_NO_CHECKPOINT)) return (false); if (r->orig_btree_checkpoint_gen == btree->checkpoint_gen && - r->orig_txn_checkpoint_gen == conn->txn_global.checkpoint_gen && + r->orig_txn_checkpoint_gen == + __wt_gen(session, WT_GEN_CHECKPOINT) && r->orig_btree_checkpoint_gen == r->orig_txn_checkpoint_gen) return (false); return (true); @@ -810,12 +809,10 @@ __rec_write_init(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags, WT_SALVAGE_COOKIE *salvage, void *reconcilep) { WT_BTREE *btree; - WT_CONNECTION_IMPL *conn; WT_PAGE *page; WT_RECONCILE *r; btree = S2BT(session); - conn = S2C(session); page = ref->page; if ((r = *(WT_RECONCILE **)reconcilep) == NULL) { @@ -845,7 +842,7 @@ __rec_write_init(WT_SESSION_IMPL *session, * These are all ordered reads, but we only need one. */ r->orig_btree_checkpoint_gen = btree->checkpoint_gen; - r->orig_txn_checkpoint_gen = conn->txn_global.checkpoint_gen; + r->orig_txn_checkpoint_gen = __wt_gen(session, WT_GEN_CHECKPOINT); WT_ORDERED_READ(r->orig_write_gen, page->modify->write_gen); /* diff --git a/src/schema/schema_alter.c b/src/schema/schema_alter.c index 26d800aa98e..b864c5d8b05 100644 --- a/src/schema/schema_alter.c +++ b/src/schema/schema_alter.c @@ -172,7 +172,7 @@ __wt_schema_alter(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) ret = ENOENT; /* Bump the schema generation so that stale data is ignored. */ - ++S2C(session)->schema_gen; + (void)__wt_gen_next(session, WT_GEN_SCHEMA); WT_TRET(__wt_meta_track_off(session, true, ret != 0)); diff --git a/src/schema/schema_drop.c b/src/schema/schema_drop.c index 49801e4e5f9..55f92a64eee 100644 --- a/src/schema/schema_drop.c +++ b/src/schema/schema_drop.c @@ -201,7 +201,7 @@ __wt_schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) ret = force ? 0 : ENOENT; /* Bump the schema generation so that stale data is ignored. */ - ++S2C(session)->schema_gen; + (void)__wt_gen_next(session, WT_GEN_SCHEMA); WT_TRET(__wt_meta_track_off(session, true, ret != 0)); diff --git a/src/schema/schema_list.c b/src/schema/schema_list.c index 74ef5135a4a..6060cfb6aca 100644 --- a/src/schema/schema_list.c +++ b/src/schema/schema_list.c @@ -66,7 +66,8 @@ restart: * between checking the generation and opening the * first column group. */ - if (table->schema_gen != S2C(session)->schema_gen) { + if (table->schema_gen != + __wt_gen(session, WT_GEN_SCHEMA)) { if (table->refcnt == 0) { WT_RET(__wt_schema_remove_table( session, table)); diff --git a/src/schema/schema_open.c b/src/schema/schema_open.c index 44bd66e011a..c9f2baa81b8 100644 --- a/src/schema/schema_open.c +++ b/src/schema/schema_open.c @@ -502,7 +502,7 @@ __schema_open_table(WT_SESSION_IMPL *session, table->name); /* Copy the schema generation into the new table. */ - table->schema_gen = S2C(session)->schema_gen; + table->schema_gen = __wt_gen(session, WT_GEN_SCHEMA); *tablep = table; diff --git a/src/schema/schema_rename.c b/src/schema/schema_rename.c index a374f4c2831..14b5dc7afe7 100644 --- a/src/schema/schema_rename.c +++ b/src/schema/schema_rename.c @@ -277,7 +277,7 @@ __wt_schema_rename(WT_SESSION_IMPL *session, ret = __wt_bad_object_type(session, uri); /* Bump the schema generation so that stale data is ignored. */ - ++S2C(session)->schema_gen; + (void)__wt_gen_next(session, WT_GEN_SCHEMA); WT_TRET(__wt_meta_track_off(session, true, ret != 0)); diff --git a/src/session/session_api.c b/src/session/session_api.c index b7daf0e2e02..5429d95a11b 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -99,6 +99,9 @@ __wt_session_release_resources(WT_SESSION_IMPL *session) if (session->reconcile_cleanup != NULL) WT_TRET(session->reconcile_cleanup(session)); + /* Stashed memory. */ + __wt_stash_discard(session); + /* * Discard scratch buffers, error memory; last, just in case a cleanup * routine uses scratch buffers. diff --git a/src/session/session_compact.c b/src/session/session_compact.c index 72c072e0fb8..efbf18bcb14 100644 --- a/src/session/session_compact.c +++ b/src/session/session_compact.c @@ -225,10 +225,10 @@ __compact_checkpoint(WT_SESSION_IMPL *session) * generation number changes, the checkpoint blocking us has completed. */ txn_global = &S2C(session)->txn_global; - for (txn_gen = txn_global->checkpoint_gen;;) { + for (txn_gen = __wt_gen(session, WT_GEN_CHECKPOINT);;) { WT_READ_BARRIER(); if (!txn_global->checkpoint_running || - txn_gen != txn_global->checkpoint_gen) + txn_gen != __wt_gen(session, WT_GEN_CHECKPOINT)) break; WT_RET(__wt_session_compact_check_timeout(session)); diff --git a/src/support/generation.c b/src/support/generation.c new file mode 100644 index 00000000000..3081a3fa0f1 --- /dev/null +++ b/src/support/generation.c @@ -0,0 +1,346 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * WiredTiger uses generations to manage various resources. Threads publish an + * a current generation before accessing a resource, and clear it when they are + * done. For example, a thread wanting to replace an object in memory replaces + * the object and increments the object's generation. Once no threads have the + * previous generation published, it is safe to discard the previous version of + * the object. + */ + +/* + * __wt_gen_init -- + * Initialize the connection's generations. + */ +void +__wt_gen_init(WT_SESSION_IMPL *session) +{ + int i; + + /* + * All generations start at 1, a session with a generation of 0 isn't + * using the resource. + */ + for (i = 0; i < WT_GENERATIONS; ++i) + S2C(session)->generations[i] = 1; + + /* Ensure threads see the state change. */ + WT_WRITE_BARRIER(); +} + +/* + * __wt_gen -- + * Return the resource's generation. + */ +uint64_t +__wt_gen(WT_SESSION_IMPL *session, int which) +{ + return (S2C(session)->generations[which]); +} + +/* + * __wt_gen_next -- + * Switch the resource to its next generation. + */ +uint64_t +__wt_gen_next(WT_SESSION_IMPL *session, int which) +{ + return (__wt_atomic_addv64(&S2C(session)->generations[which], 1)); +} + +#if 0 +/* + * __wt_gen_next_drain -- + * Switch the resource to its next generation, then wait for it to drain. + */ +uint64_t + TABBED IN to avoid dist/ functions: + __wt_gen_next_drain(WT_SESSION_IMPL *session, int which) +{ + uint64_t v; + + v = __wt_atomic_addv64(&S2C(session)->generations[which], 1); + + __wt_gen_drain(session, which, v); + + return (v); +} + +/* + * __wt_gen_drain -- + * Wait for the resource to drain. + */ +void + TABBED IN to avoid dist/ functions: + __wt_gen_drain(WT_SESSION_IMPL *session, int which, uint64_t generation) +{ + WT_CONNECTION_IMPL *conn; + WT_SESSION_IMPL *s; + uint64_t v; + uint32_t i, session_cnt; + int pause_cnt; + + conn = S2C(session); + + /* + * No lock is required because the session array is fixed size, but it + * may contain inactive entries. We must review any active session, so + * insert a read barrier after reading the active session count. That + * way, no matter what sessions come or go, we'll check the slots for + * all of the sessions that could have been active when we started our + * check. + */ + WT_ORDERED_READ(session_cnt, conn->session_cnt); + for (pause_cnt = 0, + s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) { + if (!s->active) + continue; + + for (;;) { + /* Ensure we only read the value once. */ + WT_ORDERED_READ(v, s->generations[which]); + + if (v == 0 || generation <= v) + break; + + /* + * The pause count is cumulative, quit spinning if it's + * not doing us any good, that can happen in generations + * that don't move quickly. + */ + if (++pause_cnt < WT_THOUSAND) + WT_PAUSE(); + else + __wt_sleep(0, 10); + } + } +} +#endif + +/* + * __wt_gen_oldest -- + * Return the oldest generation in use for the resource. + */ +uint64_t +__wt_gen_oldest(WT_SESSION_IMPL *session, int which) +{ + WT_CONNECTION_IMPL *conn; + WT_SESSION_IMPL *s; + uint64_t oldest, v; + uint32_t i, session_cnt; + + conn = S2C(session); + + /* + * No lock is required because the session array is fixed size, but it + * may contain inactive entries. We must review any active session, so + * insert a read barrier after reading the active session count. That + * way, no matter what sessions come or go, we'll check the slots for + * all of the sessions that could have been active when we started our + * check. + */ + WT_ORDERED_READ(session_cnt, conn->session_cnt); + for (oldest = conn->generations[which] + 1, + s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) { + if (!s->active) + continue; + + /* Ensure we only read the value once. */ + WT_ORDERED_READ(v, s->generations[which]); + if (v != 0 && v < oldest) + oldest = v; + } + + return (oldest); +} + +/* + * __wt_session_gen -- + * Return the thread's resource generation. + */ +uint64_t +__wt_session_gen(WT_SESSION_IMPL *session, int which) +{ + return (session->generations[which]); +} + +/* + * __wt_session_gen_enter -- + * Publish a thread's resource generation. + */ +void +__wt_session_gen_enter(WT_SESSION_IMPL *session, int which) +{ + /* + * Assign the thread's resource generation and publish it, ensuring + * threads waiting on a resource to drain see the new value. Check we + * haven't raced with a generation update after publishing, we rely on + * the published value not being missed when scanning for the oldest + * generation. + */ + do { + session->generations[which] = __wt_gen(session, which); + WT_WRITE_BARRIER(); + } while (session->generations[which] != __wt_gen(session, which)); +} + +/* + * __wt_session_gen_leave -- + * Leave a thread's resource generation. + */ +void +__wt_session_gen_leave(WT_SESSION_IMPL *session, int which) +{ + /* Ensure writes made by this thread are visible. */ + WT_PUBLISH(session->generations[which], 0); + + /* Let threads waiting for the resource to drain proceed quickly. */ + WT_FULL_BARRIER(); +} + +/* + * __stash_discard -- + * Discard any memory from a session stash that we can. + */ +static void +__stash_discard(WT_SESSION_IMPL *session, int which) +{ + WT_CONNECTION_IMPL *conn; + WT_SESSION_STASH *session_stash; + WT_STASH *stash; + uint64_t oldest; + size_t i; + + conn = S2C(session); + session_stash = &session->stash[which]; + + /* Get the resource's oldest generation. */ + oldest = __wt_gen_oldest(session, which); + + for (i = 0, + stash = session_stash->list; i < session_stash->cnt; ++i, ++stash) { + if (stash->p == NULL) + continue; + /* + * The list is expected to be in generation-sorted order, quit + * as soon as we find a object we can't discard. + */ + if (stash->gen >= oldest) + break; + + (void)__wt_atomic_sub64(&conn->stashed_bytes, stash->len); + (void)__wt_atomic_sub64(&conn->stashed_objects, 1); + + /* + * It's a bad thing if another thread is in this memory after + * we free it, make sure nothing good happens to that thread. + */ + __wt_overwrite_and_free_len(session, stash->p, stash->len); + } + + /* + * If there are enough free slots at the beginning of the list, shuffle + * everything down. + */ + if (i > 100 || i == session_stash->cnt) + if ((session_stash->cnt -= i) > 0) + memmove(session_stash->list, stash, + session_stash->cnt * sizeof(*stash)); +} + +/* + * __wt_stash_discard -- + * Discard any memory from a session stash that we can. + */ +void +__wt_stash_discard(WT_SESSION_IMPL *session) +{ + WT_SESSION_STASH *session_stash; + int which; + + for (which = 0; which < WT_GENERATIONS; ++which) { + session_stash = &session->stash[which]; + if (session_stash->cnt >= 1) + __stash_discard(session, which); + } +} + +/* + * __wt_stash_add -- + * Add a new entry into a session stash list. + */ +int +__wt_stash_add(WT_SESSION_IMPL *session, + int which, uint64_t generation, void *p, size_t len) +{ + WT_CONNECTION_IMPL *conn; + WT_SESSION_STASH *session_stash; + WT_STASH *stash; + + conn = S2C(session); + session_stash = &session->stash[which]; + + /* Grow the list as necessary. */ + WT_RET(__wt_realloc_def(session, &session_stash->alloc, + session_stash->cnt + 1, &session_stash->list)); + + /* + * If no caller stashes memory with a lower generation than a previously + * stashed object, the list is in generation-sorted order and discarding + * can be faster. (An error won't cause problems other than we might not + * discard stashed objects as soon as we otherwise would have.) + */ + stash = session_stash->list + session_stash->cnt++; + stash->p = p; + stash->len = len; + stash->gen = generation; + + (void)__wt_atomic_add64(&conn->stashed_bytes, len); + (void)__wt_atomic_add64(&conn->stashed_objects, 1); + + /* See if we can free any previous entries. */ + if (session_stash->cnt > 1) + __stash_discard(session, which); + + return (0); +} + +/* + * __wt_stash_discard_all -- + * Discard all memory from a session's stash. + */ +void +__wt_stash_discard_all(WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session) +{ + WT_SESSION_STASH *session_stash; + WT_STASH *stash; + int which; + size_t i; + + /* + * This function is called during WT_CONNECTION.close to discard any + * memory that remains. For that reason, we take two WT_SESSION_IMPL + * arguments: session_safe is still linked to the WT_CONNECTION and + * can be safely used for calls to other WiredTiger functions, while + * session is the WT_SESSION_IMPL we're cleaning up. + */ + for (which = 0; which < WT_GENERATIONS; ++which) { + session_stash = &session->stash[which]; + + for (i = 0, stash = session_stash->list; + i < session_stash->cnt; ++i, ++stash) + __wt_free(session_safe, stash->p); + + __wt_free(session_safe, session_stash->list); + session_stash->cnt = session_stash->alloc = 0; + } +} diff --git a/src/support/hazard.c b/src/support/hazard.c index 7e88ad183fe..5f93ce45ba1 100644 --- a/src/support/hazard.c +++ b/src/support/hazard.c @@ -22,6 +22,7 @@ hazard_grow(WT_SESSION_IMPL *session) WT_HAZARD *nhazard; size_t size; void *ohazard; + uint64_t hazard_gen; /* * Allocate a new, larger hazard pointer array and copy the contents of @@ -40,10 +41,6 @@ hazard_grow(WT_SESSION_IMPL *session) ohazard = session->hazard; WT_PUBLISH(session->hazard, nhazard); - __wt_spin_lock(session, &S2C(session)->api_lock); - __wt_conn_foc_add(session, ohazard); - __wt_spin_unlock(session, &S2C(session)->api_lock); - /* * Increase the size of the session's pointer array after swapping it * into place (the session's reference must be updated before eviction @@ -51,6 +48,15 @@ hazard_grow(WT_SESSION_IMPL *session) */ WT_PUBLISH(session->hazard_size, (uint32_t)(size * 2)); + /* + * Threads using the hazard pointer array from now on will use the new + * one. Increment the hazard pointer generation number, and schedule a + * future free of the old memory. Ignore any failure, leak the memory. + */ + hazard_gen = __wt_gen_next(session, WT_GEN_HAZARD); + WT_IGNORE_RET( + __wt_stash_add(session, WT_GEN_HAZARD, hazard_gen, ohazard, 0)); + return (0); } @@ -324,6 +330,13 @@ __wt_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref) WT_STAT_CONN_INCR(session, cache_hazard_checks); + /* + * Hazard pointer arrays might grow and be freed underneath us; enter + * the current hazard resource generation for the duration of the walk + * to ensure that doesn't happen. + */ + __wt_session_gen_enter(session, WT_GEN_HAZARD); + /* * No lock is required because the session array is fixed size, but it * may contain inactive entries. We must review any active session @@ -350,12 +363,17 @@ __wt_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref) if (hp->ref == ref) { WT_STAT_CONN_INCRV(session, cache_hazard_walks, walk_cnt); - return (hp); + goto done; } } } WT_STAT_CONN_INCRV(session, cache_hazard_walks, walk_cnt); - return (NULL); + hp = NULL; + +done: /* Leave the current resource generation. */ + __wt_session_gen_leave(session, WT_GEN_HAZARD); + + return (hp); } /* diff --git a/src/txn/txn.c b/src/txn/txn.c index 6eebf5ecf9f..c48053c82af 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -477,10 +477,9 @@ __wt_txn_release(WT_SESSION_IMPL *session) /* Free the scratch buffer allocated for logging. */ __wt_logrec_free(session, &txn->logrec); - /* Discard any memory from the session's split stash that we can. */ - WT_ASSERT(session, session->split_gen == 0); - if (session->split_stash_cnt > 0) - __wt_split_stash_discard(session); + /* Discard any memory from the session's stash that we can. */ + WT_ASSERT(session, __wt_session_gen(session, WT_GEN_SPLIT) == 0); + __wt_stash_discard(session); /* * Reset the transaction state to not running and release the snapshot. @@ -836,7 +835,8 @@ __wt_verbose_dump_txn(WT_SESSION_IMPL *session) WT_RET(__wt_msg(session, "checkpoint running? %s", txn_global->checkpoint_running ? "yes" : "no")); WT_RET(__wt_msg(session, - "checkpoint generation: %" PRIu64, txn_global->checkpoint_gen)); + "checkpoint generation: %" PRIu64, + __wt_gen(session, WT_GEN_CHECKPOINT))); WT_RET(__wt_msg(session, "checkpoint pinned ID: %" PRIu64, txn_global->checkpoint_pinned)); WT_RET(__wt_msg(session, diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index f4ccf5eacd0..cca5cdcb52d 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -110,8 +110,7 @@ __checkpoint_update_generation(WT_SESSION_IMPL *session) if (WT_IS_METADATA(session->dhandle)) return; - WT_PUBLISH(btree->checkpoint_gen, - S2C(session)->txn_global.checkpoint_gen); + WT_PUBLISH(btree->checkpoint_gen, __wt_gen(session, WT_GEN_CHECKPOINT)); WT_STAT_DATA_SET(session, btree_checkpoint_generation, btree->checkpoint_gen); } @@ -533,7 +532,7 @@ __checkpoint_verbose_track(WT_SESSION_IMPL *session, __wt_verbose(session, WT_VERB_CHECKPOINT, "time: %" PRIu64 " us, gen: %" PRIu64 ": Full database checkpoint %s", - msec, S2C(session)->txn_global.checkpoint_gen, msg); + msec, __wt_gen(session, WT_GEN_CHECKPOINT), msg); /* Update the timestamp so we are reporting intervals. */ memcpy(start, &stop, sizeof(*start)); @@ -667,7 +666,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_TXN_ISOLATION saved_isolation; void *saved_meta_next; u_int i; - uint64_t fsync_duration_usecs; + uint64_t fsync_duration_usecs, generation; bool failed, full, idle, logging, tracking; conn = S2C(session); @@ -733,9 +732,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) * of the transaction table, or a thread evicting in a tree could * ignore the checkpoint's transaction. */ - (void)__wt_atomic_addv64(&txn_global->checkpoint_gen, 1); - WT_STAT_CONN_SET(session, - txn_checkpoint_generation, txn_global->checkpoint_gen); + generation = __wt_gen_next(session, WT_GEN_CHECKPOINT); + WT_STAT_CONN_SET(session, txn_checkpoint_generation, generation); /* Keep track of handles acquired for locking. */ WT_ERR(__wt_meta_track_on(session)); @@ -1549,8 +1547,8 @@ __checkpoint_presync(WT_SESSION_IMPL *session, const char *cfg[]) WT_UNUSED(cfg); btree = S2BT(session); - WT_ASSERT(session, btree->checkpoint_gen == - S2C(session)->txn_global.checkpoint_gen); + WT_ASSERT(session, + btree->checkpoint_gen == __wt_gen(session, WT_GEN_CHECKPOINT)); btree->evict_walk_period = btree->evict_walk_saved; return (0); } -- cgit v1.2.1 From f071bba27f3d431677b8530dd0e7aab620161ce3 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Wed, 5 Apr 2017 10:52:17 -0400 Subject: WT-3123 Add thread pause functionality to thread group code. (#3286) Also have the thread group code manage the lifespan of threads more actively. Convert the eviction server thread group usage to use updated semantic. --- src/evict/evict_lru.c | 153 ++++++++++++++++++++-------------- src/include/extern.h | 9 +- src/include/thread_group.h | 23 ++++- src/support/thread_group.c | 203 +++++++++++++++++++++++---------------------- 4 files changed, 221 insertions(+), 167 deletions(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 47eb20acd6f..bd5ae261eaf 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -270,9 +270,20 @@ __wt_evict_server_wake(WT_SESSION_IMPL *session) __wt_cond_signal(session, cache->evict_cond); } +/* + * __wt_evict_thread_chk -- + * Check to decide if the eviction thread should continue running. + */ +bool +__wt_evict_thread_chk(WT_SESSION_IMPL *session) +{ + return (F_ISSET(S2C(session), WT_CONN_EVICTION_RUN)); +} + /* * __wt_evict_thread_run -- - * Starting point for an eviction thread. + * Entry function for an eviction thread. This is called repeatedly + * from the thread group code so it does not need to loop itself. */ int __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) @@ -285,73 +296,83 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) conn = S2C(session); cache = conn->cache; -#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE) /* - * Ensure the cache stuck timer is initialized when starting eviction. + * The thread group code calls us repeatedly. So each call is one pass + * through eviction. */ - if (thread->id == 0) - __wt_epoch(session, &cache->stuck_ts); -#endif - - while (F_ISSET(conn, WT_CONN_EVICTION_RUN) && - F_ISSET(thread, WT_THREAD_RUN)) { - if (conn->evict_server_running && - __wt_spin_trylock(session, &cache->evict_pass_lock) == 0) { - /* - * Cannot use WT_WITH_PASS_LOCK because this is a try - * lock. Fix when that is supported. We set the flag - * on both sessions because we may call clear_walk when - * we are walking with the walk session, locked. - */ - F_SET(session, WT_SESSION_LOCKED_PASS); - F_SET(cache->walk_session, WT_SESSION_LOCKED_PASS); - ret = __evict_server(session, &did_work); - F_CLR(cache->walk_session, WT_SESSION_LOCKED_PASS); - F_CLR(session, WT_SESSION_LOCKED_PASS); - was_intr = cache->pass_intr != 0; - __wt_spin_unlock(session, &cache->evict_pass_lock); - WT_ERR(ret); - - /* - * If the eviction server was interrupted, wait until - * requests have been processed: the system may - * otherwise be busy so don't go to sleep. - */ - if (was_intr) { - while (cache->pass_intr != 0 && - F_ISSET(conn, WT_CONN_EVICTION_RUN) && - F_ISSET(thread, WT_THREAD_RUN)) - __wt_yield(); - continue; - } + if (conn->evict_server_running && + __wt_spin_trylock(session, &cache->evict_pass_lock) == 0) { + /* + * Cannot use WT_WITH_PASS_LOCK because this is a try lock. + * Fix when that is supported. We set the flag on both sessions + * because we may call clear_walk when we are walking with + * the walk session, locked. + */ + F_SET(session, WT_SESSION_LOCKED_PASS); + F_SET(cache->walk_session, WT_SESSION_LOCKED_PASS); + ret = __evict_server(session, &did_work); + F_CLR(cache->walk_session, WT_SESSION_LOCKED_PASS); + F_CLR(session, WT_SESSION_LOCKED_PASS); + was_intr = cache->pass_intr != 0; + __wt_spin_unlock(session, &cache->evict_pass_lock); + WT_ERR(ret); + /* + * If the eviction server was interrupted, wait until requests + * have been processed: the system may otherwise be busy so + * don't go to sleep. + */ + if (was_intr) + while (cache->pass_intr != 0 && + F_ISSET(conn, WT_CONN_EVICTION_RUN) && + F_ISSET(thread, WT_THREAD_RUN)) + __wt_yield(); + else { __wt_verbose(session, WT_VERB_EVICTSERVER, "sleeping"); /* Don't rely on signals: check periodically. */ - __wt_cond_auto_wait( - session, cache->evict_cond, did_work, NULL); + __wt_cond_auto_wait(session, + cache->evict_cond, did_work, NULL); __wt_verbose(session, WT_VERB_EVICTSERVER, "waking"); - } else - WT_ERR(__evict_lru_pages(session, false)); + } + } else + WT_ERR(__evict_lru_pages(session, false)); + + if (0) { +err: WT_PANIC_MSG(session, ret, "cache eviction thread error"); } + return (ret); +} +/* + * __wt_evict_thread_stop -- + * Shutdown function for an eviction thread. + */ +int +__wt_evict_thread_stop(WT_SESSION_IMPL *session, WT_THREAD *thread) +{ + WT_CACHE *cache; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + + if (thread->id != 0) + return (0); + + conn = S2C(session); + cache = conn->cache; /* * The only time the first eviction thread is stopped is on shutdown: * in case any trees are still open, clear all walks now so that they * can be closed. */ - if (thread->id == 0) { - WT_WITH_PASS_LOCK(session, - ret = __evict_clear_all_walks(session)); - WT_ERR(ret); - /* - * The only two cases when the eviction server is expected to - * stop are when recovery is finished or when the connection is - * closing. - */ - WT_ASSERT(session, - F_ISSET(conn, WT_CONN_CLOSING | WT_CONN_RECOVERING)); - } + WT_WITH_PASS_LOCK(session, ret = __evict_clear_all_walks(session)); + WT_ERR(ret); + /* + * The only two cases when the eviction server is expected to + * stop are when recovery is finished or when the connection is + * closing. + */ + WT_ASSERT(session, F_ISSET(conn, WT_CONN_CLOSING | WT_CONN_RECOVERING)); __wt_verbose( session, WT_VERB_EVICTSERVER, "cache eviction thread exiting"); @@ -472,7 +493,15 @@ __wt_evict_create(WT_SESSION_IMPL *session) */ WT_RET(__wt_thread_group_create(session, &conn->evict_threads, "eviction-server", conn->evict_threads_min, conn->evict_threads_max, - WT_THREAD_CAN_WAIT | WT_THREAD_PANIC_FAIL, __wt_evict_thread_run)); + WT_THREAD_CAN_WAIT | WT_THREAD_PANIC_FAIL, __wt_evict_thread_chk, + __wt_evict_thread_run, __wt_evict_thread_stop)); + +#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE) + /* + * Ensure the cache stuck timer is initialized when starting eviction. + */ + __wt_epoch(session, &conn->cache->stuck_ts); +#endif /* * Allow queues to be populated now that the eviction threads @@ -970,8 +999,8 @@ __evict_tune_workers(WT_SESSION_IMPL *session) thread_surplus = conn->evict_threads.current_threads - conn->evict_threads_min; for (i = 0; i < thread_surplus; i++) { - WT_ERR(__wt_thread_group_stop_one( - session, &conn->evict_threads, false)); + __wt_thread_group_stop_one( + session, &conn->evict_threads); WT_STAT_CONN_INCR(session, cache_eviction_worker_removed); } @@ -1055,10 +1084,10 @@ __evict_tune_workers(WT_SESSION_IMPL *session) * were unable to acquire the thread group lock. * Break out of trying. */ - WT_ERR(__wt_thread_group_stop_one( - session, &conn->evict_threads, false)); + __wt_thread_group_stop_one( + session, &conn->evict_threads); WT_STAT_CONN_INCR(session, - cache_eviction_worker_removed); + cache_eviction_worker_removed); } WT_STAT_CONN_SET(session, cache_eviction_stable_state_workers, @@ -1094,8 +1123,8 @@ __evict_tune_workers(WT_SESSION_IMPL *session) * unable to acquire the thread group lock. Break out * of trying. */ - WT_ERR(__wt_thread_group_start_one(session, - &conn->evict_threads, false)); + __wt_thread_group_start_one(session, + &conn->evict_threads, false); WT_STAT_CONN_INCR(session, cache_eviction_worker_created); __wt_verbose(session, WT_VERB_EVICTSERVER, diff --git a/src/include/extern.h b/src/include/extern.h index b91190a4442..0aed6c1bd1a 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -344,7 +344,9 @@ extern int __wt_curtable_open(WT_SESSION_IMPL *session, const char *uri, WT_CURS extern int __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_evict_server_wake(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern bool __wt_evict_thread_chk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_evict_thread_stop(WT_SESSION_IMPL *session, WT_THREAD *thread) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_evict_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_evict_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -733,12 +735,11 @@ extern void __wt_stat_join_init_single(WT_JOIN_STATS *stats) WT_GCC_FUNC_DECL_AT extern void __wt_stat_join_clear_single(WT_JOIN_STATS *stats) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_stat_join_clear_all(WT_JOIN_STATS **stats) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_stat_join_aggregate( WT_JOIN_STATS **from, WT_JOIN_STATS *to) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern WT_THREAD_RET __wt_thread_run(void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_thread_group_resize( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, uint32_t new_min, uint32_t new_max, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_thread_group_create( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, const char *name, uint32_t min, uint32_t max, uint32_t flags, int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_thread_group_create( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, const char *name, uint32_t min, uint32_t max, uint32_t flags, bool (*chk_func)(WT_SESSION_IMPL *session), int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context), int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_thread_group_start_one( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool wait) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_thread_group_stop_one( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool wait) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_thread_group_start_one( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool is_locked) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_thread_group_stop_one(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_txn_get_snapshot(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/thread_group.h b/src/include/thread_group.h index 77cff00dc8d..031af8ca5c3 100644 --- a/src/include/thread_group.h +++ b/src/include/thread_group.h @@ -6,6 +6,8 @@ * See the file LICENSE for redistribution information. */ +#define WT_THREAD_PAUSE 10 /* Thread pause timeout in seconds */ + /* * WT_THREAD -- * Encapsulation of a thread that belongs to a thread group. @@ -19,13 +21,24 @@ struct __wt_thread { * WT_THREAD and thread-group function flags, merged because * WT_THREAD_PANIC_FAIL appears in both groups. */ -#define WT_THREAD_CAN_WAIT 0x01 /* WT_SESSION_CAN_WAIT */ -#define WT_THREAD_PANIC_FAIL 0x02 /* panic if the thread fails */ -#define WT_THREAD_RUN 0x04 /* thread is running */ +#define WT_THREAD_ACTIVE 0x01 /* thread is active or paused */ +#define WT_THREAD_CAN_WAIT 0x02 /* WT_SESSION_CAN_WAIT */ +#define WT_THREAD_PANIC_FAIL 0x04 /* panic if the thread fails */ +#define WT_THREAD_RUN 0x08 /* thread is running */ uint32_t flags; + /* + * Condition signalled when a thread becomes active. Paused + * threads wait on this condition. + */ + WT_CONDVAR *pause_cond; + + /* The check function used by all threads. */ + bool (*chk_func)(WT_SESSION_IMPL *session); /* The runner function used by all threads. */ int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context); + /* The stop function used by all threads. */ + int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context); }; /* @@ -57,6 +70,10 @@ struct __wt_thread_group { */ WT_THREAD **threads; + /* The check function used by all threads. */ + bool (*chk_func)(WT_SESSION_IMPL *session); /* The runner function used by all threads. */ int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context); + /* The stop function used by all threads. May be NULL */ + int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context); }; diff --git a/src/support/thread_group.c b/src/support/thread_group.c index 2b4b7ad4e61..422fe38a526 100644 --- a/src/support/thread_group.c +++ b/src/support/thread_group.c @@ -9,11 +9,11 @@ #include "wt_internal.h" /* - * __wt_thread_run -- + * __thread_run -- * General wrapper for any thread. */ -WT_THREAD_RET -__wt_thread_run(void *arg) +static WT_THREAD_RET +__thread_run(void *arg) { WT_DECL_RET; WT_SESSION_IMPL *session; @@ -22,7 +22,20 @@ __wt_thread_run(void *arg) thread = (WT_THREAD*)arg; session = thread->session; - ret = thread->run_func(session, thread); + for (;;) { + if (!F_ISSET(thread, WT_THREAD_RUN)) + break; + if (!F_ISSET(thread, WT_THREAD_ACTIVE)) + __wt_cond_wait(session, thread->pause_cond, + WT_THREAD_PAUSE * WT_MILLION, thread->chk_func); + WT_ERR(thread->run_func(session, thread)); + } + + /* + * If a thread is stopping it may have subsystem cleanup to do. + */ +err: if (thread->stop_func != NULL) + ret = thread->stop_func(session, thread); if (ret != 0 && F_ISSET(thread, WT_THREAD_PANIC_FAIL)) WT_PANIC_MSG(session, ret, @@ -40,43 +53,14 @@ __wt_thread_run(void *arg) return (WT_THREAD_RET_VALUE); } -/* - * __thread_group_grow -- - * Increase the number of running threads in the group. - */ -static int -__thread_group_grow( - WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, uint32_t new_count) -{ - WT_THREAD *thread; - - WT_ASSERT(session, __wt_rwlock_islocked(session, &group->lock)); - - /* - * Any bounds checking is done by the caller so we know that - * there is space in the array for new threads. - */ - while (group->current_threads < new_count) { - thread = group->threads[group->current_threads++]; - __wt_verbose(session, WT_VERB_THREAD_GROUP, - "Starting utility thread: %p:%" PRIu32, - (void *)group, thread->id); - F_SET(thread, WT_THREAD_RUN); - WT_ASSERT(session, thread->session != NULL); - WT_RET(__wt_thread_create(thread->session, - &thread->tid, __wt_thread_run, thread)); - } - return (0); -} - /* * __thread_group_shrink -- - * Decrease the number of running threads in the group. Optionally free any - * memory associated with slots larger than the new count. + * Decrease the number of threads in the group and free memory + * associated with slots larger than the new count. */ static int -__thread_group_shrink(WT_SESSION_IMPL *session, - WT_THREAD_GROUP *group, uint32_t new_count, bool free_thread) +__thread_group_shrink( + WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, uint32_t new_count) { WT_DECL_RET; WT_SESSION *wt_session; @@ -96,28 +80,28 @@ __thread_group_shrink(WT_SESSION_IMPL *session, continue; /* Wake threads to ensure they notice the state change */ - if (thread->tid != 0) { - __wt_verbose(session, WT_VERB_THREAD_GROUP, - "Stopping utility thread: %p:%" PRIu32, - (void *)group, thread->id); - F_CLR(thread, WT_THREAD_RUN); - __wt_cond_signal(session, group->wait_cond); - WT_TRET(__wt_thread_join(session, thread->tid)); - thread->tid = 0; - } - if (free_thread) { - if (thread->session != NULL) { - wt_session = (WT_SESSION *)thread->session; - WT_TRET(wt_session->close(wt_session, NULL)); - thread->session = NULL; - } - __wt_free(session, thread); - group->threads[current_slot] = NULL; - } + WT_ASSERT(session, thread->tid != 0); + __wt_verbose(session, WT_VERB_THREAD_GROUP, + "Stopping utility thread: %p:%" PRIu32, + (void *)group, thread->id); + if (F_ISSET(thread, WT_THREAD_ACTIVE)) + --group->current_threads; + F_CLR(thread, WT_THREAD_ACTIVE | WT_THREAD_RUN); + /* + * Signal the thread in case it is in a long timeout. + */ + __wt_cond_signal(session, thread->pause_cond); + __wt_cond_signal(session, group->wait_cond); + WT_TRET(__wt_thread_join(session, thread->tid)); + WT_TRET(__wt_cond_destroy(session, &thread->pause_cond)); + WT_ASSERT(session, thread->session != NULL); + wt_session = (WT_SESSION *)thread->session; + WT_TRET(wt_session->close(wt_session, NULL)); + thread->session = NULL; + __wt_free(session, thread); + group->threads[current_slot] = NULL; } - /* Update the thread group state to match our changes */ - group->current_threads = current_slot; return (ret); } @@ -139,6 +123,11 @@ __thread_group_resize( conn = S2C(session); session_flags = 0; + __wt_verbose(session, WT_VERB_THREAD_GROUP, + "Resize thread group: %p, from min: %" PRIu32 " -> %" PRIu32 + " from max: %" PRIu32 " -> %" PRIu32, + (void *)group, group->min, new_min, group->max, new_max); + WT_ASSERT(session, group->current_threads <= group->alloc && __wt_rwlock_islocked(session, &group->lock)); @@ -153,7 +142,7 @@ __thread_group_resize( * Call shrink to reduce the number of thread structures and running * threads if required by the change in group size. */ - WT_RET(__thread_group_shrink(session, group, new_max, true)); + WT_RET(__thread_group_shrink(session, group, new_max)); /* * Only reallocate the thread array if it is the largest ever, since @@ -187,13 +176,26 @@ __thread_group_resize( if (LF_ISSET(WT_THREAD_PANIC_FAIL)) F_SET(thread, WT_THREAD_PANIC_FAIL); thread->id = i; + thread->chk_func = group->chk_func; thread->run_func = group->run_func; + thread->stop_func = group->stop_func; + WT_ERR(__wt_cond_alloc( + session, "Thread cond", &thread->pause_cond)); WT_ASSERT(session, group->threads[i] == NULL); group->threads[i] = thread; - } - if (group->current_threads < new_min) - WT_ERR(__thread_group_grow(session, group, new_min)); + /* + * Start thread as inactive. We'll activate the needed + * number later. + */ + __wt_verbose(session, WT_VERB_THREAD_GROUP, + "Starting utility thread: %p:%" PRIu32, + (void *)group, thread->id); + F_SET(thread, WT_THREAD_RUN); + WT_ASSERT(session, thread->session != NULL); + WT_ERR(__wt_thread_create(thread->session, + &thread->tid, __thread_run, thread)); + } err: /* * Update the thread group information even on failure to improve our @@ -206,7 +208,10 @@ err: /* * An error resizing a thread array is fatal, it should only happen * in an out of memory situation. */ - if (ret != 0) { + if (ret == 0) + while (group->current_threads < new_min) + __wt_thread_group_start_one(session, group, true); + else { WT_TRET(__wt_thread_group_destroy(session, group)); WT_PANIC_RET(session, ret, "Error while resizing thread group"); } @@ -224,11 +229,6 @@ __wt_thread_group_resize( { WT_DECL_RET; - __wt_verbose(session, WT_VERB_THREAD_GROUP, - "Resize thread group: %p, from min: %" PRIu32 " -> %" PRIu32 - " from max: %" PRIu32 " -> %" PRIu32, - (void *)group, group->min, new_min, group->max, new_max); - __wt_writelock(session, &group->lock); WT_TRET(__thread_group_resize(session, group, new_min, new_max, flags)); __wt_writeunlock(session, &group->lock); @@ -244,7 +244,9 @@ int __wt_thread_group_create( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, const char *name, uint32_t min, uint32_t max, uint32_t flags, - int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context)) + bool (*chk_func)(WT_SESSION_IMPL *session), + int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context), + int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context)) { WT_DECL_RET; bool cond_alloced; @@ -263,7 +265,9 @@ __wt_thread_group_create( cond_alloced = true; __wt_writelock(session, &group->lock); + group->chk_func = chk_func; group->run_func = run_func; + group->stop_func = stop_func; group->name = name; WT_TRET(__thread_group_resize(session, group, min, max, flags)); @@ -293,7 +297,7 @@ __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) WT_ASSERT(session, __wt_rwlock_islocked(session, &group->lock)); /* Shut down all threads and free associated resources. */ - WT_TRET(__thread_group_shrink(session, group, 0, true)); + WT_TRET(__thread_group_shrink(session, group, 0)); __wt_free(session, group->threads); @@ -314,52 +318,55 @@ __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) * __wt_thread_group_start_one -- * Start a new thread if possible. */ -int +void __wt_thread_group_start_one( - WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool wait) + WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool is_locked) { - WT_DECL_RET; + WT_THREAD *thread; if (group->current_threads >= group->max) - return (0); + return; - if (wait) + if (!is_locked) __wt_writelock(session, &group->lock); - else - WT_RET(__wt_try_writelock(session, &group->lock)); /* Recheck the bounds now that we hold the lock */ - if (group->current_threads < group->max) - WT_TRET(__thread_group_grow( - session, group, group->current_threads + 1)); - __wt_writeunlock(session, &group->lock); - - return (ret); + if (group->current_threads < group->max) { + thread = group->threads[group->current_threads++]; + WT_ASSERT(session, thread != NULL); + __wt_verbose(session, WT_VERB_THREAD_GROUP, + "Activating utility thread: %p:%" PRIu32, + (void *)group, thread->id); + WT_ASSERT(session, !F_ISSET(thread, WT_THREAD_ACTIVE)); + F_SET(thread, WT_THREAD_ACTIVE); + __wt_cond_signal(session, thread->pause_cond); + } + if (!is_locked) + __wt_writeunlock(session, &group->lock); } /* * __wt_thread_group_stop_one -- - * Stop one thread if possible. + * Pause one thread if possible. */ -int -__wt_thread_group_stop_one( - WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool wait) +void +__wt_thread_group_stop_one(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) { - WT_DECL_RET; + WT_THREAD *thread; if (group->current_threads <= group->min) - return (0); - - if (wait) - __wt_writelock(session, &group->lock); - else - WT_RET(__wt_try_writelock(session, &group->lock)); + return; + __wt_writelock(session, &group->lock); /* Recheck the bounds now that we hold the lock */ - if (group->current_threads > group->min) - WT_TRET(__thread_group_shrink( - session, group, group->current_threads - 1, false)); + if (group->current_threads > group->min) { + thread = group->threads[--group->current_threads]; + __wt_verbose(session, WT_VERB_THREAD_GROUP, + "Pausing utility thread: %p:%" PRIu32, + (void *)group, thread->id); + WT_ASSERT(session, F_ISSET(thread, WT_THREAD_ACTIVE)); + F_CLR(thread, WT_THREAD_ACTIVE); + __wt_cond_signal(session, thread->pause_cond); + } __wt_writeunlock(session, &group->lock); - - return (ret); } -- cgit v1.2.1 From 3be0fb108d0cc6e22b83ed59f8ea1108f05c4b43 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Wed, 5 Apr 2017 13:29:48 -0400 Subject: WT-3253 Update Python transaction tests to have correct flush configuration (#3368) --- test/suite/test_txn02.py | 5 ++++- test/suite/test_txn05.py | 5 ++++- test/suite/test_txn07.py | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/test/suite/test_txn02.py b/test/suite/test_txn02.py index 01626057b9e..0ab05baea36 100644 --- a/test/suite/test_txn02.py +++ b/test/suite/test_txn02.py @@ -137,7 +137,10 @@ class test_txn02(wttest.WiredTigerTestCase, suite_subprocess): self.check(self.session2, "isolation=read-uncommitted", current) # Opening a clone of the database home directory should run - # recovery and see the committed results. + # recovery and see the committed results. Flush the log because + # the backup may not get all the log records if we are running + # without a sync option. Use sync=off to force a write to the OS. + self.session.log_flush('sync=off') self.backup(self.backup_dir) backup_conn_params = 'log=(enabled,file_max=%s)' % self.logmax backup_conn = self.wiredtiger_open(self.backup_dir, backup_conn_params) diff --git a/test/suite/test_txn05.py b/test/suite/test_txn05.py index 7aaff221ba4..c1902404f8d 100644 --- a/test/suite/test_txn05.py +++ b/test/suite/test_txn05.py @@ -101,7 +101,10 @@ class test_txn05(wttest.WiredTigerTestCase, suite_subprocess): self.check(self.session2, "isolation=read-uncommitted", current) # Opening a clone of the database home directory should run - # recovery and see the committed results. + # recovery and see the committed results. Flush the log because + # the backup may not get all the log records if we are running + # without a sync option. Use sync=off to force a write to the OS. + self.session.log_flush('sync=off') self.backup(self.backup_dir) backup_conn_params = 'log=(enabled,file_max=%s)' % self.logmax backup_conn = self.wiredtiger_open(self.backup_dir, backup_conn_params) diff --git a/test/suite/test_txn07.py b/test/suite/test_txn07.py index e26cf5aaaea..ffb6ecfbecb 100644 --- a/test/suite/test_txn07.py +++ b/test/suite/test_txn07.py @@ -112,7 +112,10 @@ class test_txn07(wttest.WiredTigerTestCase, suite_subprocess): self.check(self.session2, "isolation=read-uncommitted", current) # Opening a clone of the database home directory should run - # recovery and see the committed results. + # recovery and see the committed results. Flush the log because + # the backup may not get all the log records if we are running + # without a sync option. Use sync=off to force a write to the OS. + self.session.log_flush('sync=off') self.backup(self.backup_dir) backup_conn_params = 'log=(enabled,file_max=%s,' % self.logmax + \ 'compressor=%s)' % self.compress + \ -- cgit v1.2.1 From 455cd74ad9718ca1b1b64e1c28b23ca1717220a0 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 6 Apr 2017 11:52:38 +1000 Subject: WT-3265 Allow eviction of recently split pages when tree is locked. When pages split in WiredTiger, internal pages cannot be evicted immediately because there is a chance that a reader is still looking at an index pointing to the page. We check for this when considering pages for eviction, and assert that we never evict an internal page in an active generation. However, if a page splits and then we try to get exclusive access to the tree (e.g., to verify it), we could fail to evict the tree from cache even though we have guaranteed exclusive access to it. Relax the check on internal pages to allow eviction from trees that are locked exclusive. --- src/include/btree.i | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/include/btree.i b/src/include/btree.i index dcc73f51f1b..c9aee73ced6 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1354,8 +1354,13 @@ __wt_page_can_evict( * the original parent page's index, because evicting an internal page * discards its WT_REF array, and a thread traversing the original * parent page index might see a freed WT_REF. + * + * One special case where we know this is safe is if the handle is + * locked exclusive (e.g., when the whole tree is being evicted). In + * that case, no readers can be looking at an old index. */ - if (WT_PAGE_IS_INTERNAL(page) && + if (!F_ISSET(session->dhandle, WT_DHANDLE_EXCLUSIVE) && + WT_PAGE_IS_INTERNAL(page) && page->pg_intl_split_gen >= __wt_gen_oldest(session, WT_GEN_SPLIT)) return (false); -- cgit v1.2.1 From b08187d2e034af66b48203682ecd3737f64545a2 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 6 Apr 2017 11:58:40 +1000 Subject: Revert "WT-3265 Allow eviction of recently split pages when tree is locked." This reverts commit 455cd74ad9718ca1b1b64e1c28b23ca1717220a0. --- src/include/btree.i | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/include/btree.i b/src/include/btree.i index c9aee73ced6..dcc73f51f1b 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1354,13 +1354,8 @@ __wt_page_can_evict( * the original parent page's index, because evicting an internal page * discards its WT_REF array, and a thread traversing the original * parent page index might see a freed WT_REF. - * - * One special case where we know this is safe is if the handle is - * locked exclusive (e.g., when the whole tree is being evicted). In - * that case, no readers can be looking at an old index. */ - if (!F_ISSET(session->dhandle, WT_DHANDLE_EXCLUSIVE) && - WT_PAGE_IS_INTERNAL(page) && + if (WT_PAGE_IS_INTERNAL(page) && page->pg_intl_split_gen >= __wt_gen_oldest(session, WT_GEN_SPLIT)) return (false); -- cgit v1.2.1 From c7b4322b8199a5c0d83f9b44018beb7a35edef09 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Fri, 7 Apr 2017 00:17:40 -0400 Subject: WT-3266 Drop lock while joining threads. (#3375) --- src/support/thread_group.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/support/thread_group.c b/src/support/thread_group.c index 422fe38a526..38e3049c264 100644 --- a/src/support/thread_group.c +++ b/src/support/thread_group.c @@ -79,7 +79,6 @@ __thread_group_shrink( if (thread == NULL) continue; - /* Wake threads to ensure they notice the state change */ WT_ASSERT(session, thread->tid != 0); __wt_verbose(session, WT_VERB_THREAD_GROUP, "Stopping utility thread: %p:%" PRIu32, @@ -92,8 +91,27 @@ __thread_group_shrink( */ __wt_cond_signal(session, thread->pause_cond); __wt_cond_signal(session, group->wait_cond); + } + + /* + * We have to perform the join without holding the lock because + * the threads themselves may be waiting on the lock. + */ + __wt_writeunlock(session, &group->lock); + for (current_slot = group->alloc; current_slot > new_count; ) { + thread = group->threads[--current_slot]; + + if (thread == NULL) + continue; WT_TRET(__wt_thread_join(session, thread->tid)); WT_TRET(__wt_cond_destroy(session, &thread->pause_cond)); + } + __wt_writelock(session, &group->lock); + for (current_slot = group->alloc; current_slot > new_count; ) { + thread = group->threads[--current_slot]; + + if (thread == NULL) + continue; WT_ASSERT(session, thread->session != NULL); wt_session = (WT_SESSION *)thread->session; WT_TRET(wt_session->close(wt_session, NULL)); -- cgit v1.2.1 From 1df0d26ef1b81fe40fca840cee03970694bded3e Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 7 Apr 2017 00:43:02 -0400 Subject: WT-3267 Upgrade copyright notices from 2016 to 2017. (#3374) --- LICENSE | 2 +- api/leveldb/leveldb_wt.h | 2 +- bench/wtperf/config.c | 2 +- bench/wtperf/config_opt.h | 2 +- bench/wtperf/idle_table_cycle.c | 2 +- bench/wtperf/misc.c | 2 +- bench/wtperf/runners/get_ckpt.py | 2 +- bench/wtperf/track.c | 2 +- bench/wtperf/wtperf.c | 2 +- bench/wtperf/wtperf.h | 2 +- bench/wtperf/wtperf_opt.i | 2 +- bench/wtperf/wtperf_throttle.c | 2 +- bench/wtperf/wtperf_truncate.c | 2 +- dist/s_copyright | 2 ++ dist/s_copyright.list | 2 ++ examples/c/ex_access.c | 2 +- examples/c/ex_all.c | 2 +- examples/c/ex_async.c | 2 +- examples/c/ex_backup.c | 2 +- examples/c/ex_call_center.c | 2 +- examples/c/ex_config_parse.c | 2 +- examples/c/ex_cursor.c | 2 +- examples/c/ex_data_source.c | 2 +- examples/c/ex_encrypt.c | 2 +- examples/c/ex_event_handler.c | 2 +- examples/c/ex_extending.c | 2 +- examples/c/ex_extractor.c | 2 +- examples/c/ex_file_system.c | 2 +- examples/c/ex_hello.c | 2 +- examples/c/ex_log.c | 2 +- examples/c/ex_pack.c | 2 +- examples/c/ex_process.c | 2 +- examples/c/ex_schema.c | 2 +- examples/c/ex_stat.c | 2 +- examples/c/ex_sync.c | 4 ++-- examples/c/ex_thread.c | 2 +- .../java/com/wiredtiger/examples/ex_access.java | 2 +- examples/java/com/wiredtiger/examples/ex_all.java | 2 +- .../java/com/wiredtiger/examples/ex_async.java | 2 +- .../com/wiredtiger/examples/ex_call_center.java | 2 +- .../java/com/wiredtiger/examples/ex_cursor.java | 2 +- examples/java/com/wiredtiger/examples/ex_log.java | 2 +- .../java/com/wiredtiger/examples/ex_schema.java | 2 +- examples/java/com/wiredtiger/examples/ex_stat.java | 2 +- .../java/com/wiredtiger/examples/ex_thread.java | 2 +- examples/python/ex_access.py | 2 +- examples/python/ex_stat.py | 2 +- ext/collators/reverse/reverse_collator.c | 2 +- ext/collators/revint/revint_collator.c | 2 +- ext/compressors/lz4/lz4_compress.c | 2 +- ext/compressors/nop/nop_compress.c | 2 +- ext/compressors/snappy/snappy_compress.c | 2 +- ext/compressors/zlib/zlib_compress.c | 2 +- ext/compressors/zstd/zstd_compress.c | 2 +- ext/datasources/helium/helium.c | 2 +- ext/encryptors/nop/nop_encrypt.c | 2 +- ext/encryptors/rotn/rotn_encrypt.c | 2 +- ext/extractors/csv/csv_extractor.c | 2 +- ext/test/fail_fs/fail_fs.c | 2 +- ext/test/kvs_bdb/kvs_bdb.c | 2 +- lang/java/src/com/wiredtiger/db/AsyncCallback.java | 2 +- .../com/wiredtiger/db/PackFormatInputStream.java | 2 +- .../src/com/wiredtiger/db/PackInputStream.java | 2 +- .../src/com/wiredtiger/db/PackOutputStream.java | 2 +- lang/java/src/com/wiredtiger/db/PackUtil.java | 2 +- .../src/com/wiredtiger/db/WiredTigerException.java | 2 +- .../wiredtiger/db/WiredTigerPackingException.java | 2 +- .../wiredtiger/db/WiredTigerPanicException.java | 2 +- .../wiredtiger/db/WiredTigerRollbackException.java | 2 +- lang/java/wiredtiger.i | 2 +- lang/python/setup.py | 2 +- lang/python/setup_pip.py | 2 +- lang/python/wiredtiger.i | 2 +- lang/python/wiredtiger/fpacking.py | 2 +- lang/python/wiredtiger/intpacking.py | 2 +- lang/python/wiredtiger/packing.py | 2 +- lang/python/wiredtiger/pip_init.py | 2 +- src/async/async_api.c | 2 +- src/async/async_op.c | 2 +- src/async/async_worker.c | 2 +- src/block/block_addr.c | 2 +- src/block/block_ckpt.c | 2 +- src/block/block_compact.c | 2 +- src/block/block_ext.c | 2 +- src/block/block_map.c | 2 +- src/block/block_mgr.c | 2 +- src/block/block_open.c | 2 +- src/block/block_read.c | 2 +- src/block/block_session.c | 2 +- src/block/block_slvg.c | 2 +- src/block/block_vrfy.c | 2 +- src/block/block_write.c | 2 +- src/bloom/bloom.c | 2 +- src/btree/bt_compact.c | 2 +- src/btree/bt_curnext.c | 2 +- src/btree/bt_curprev.c | 2 +- src/btree/bt_cursor.c | 2 +- src/btree/bt_debug.c | 2 +- src/btree/bt_delete.c | 2 +- src/btree/bt_discard.c | 2 +- src/btree/bt_handle.c | 2 +- src/btree/bt_huffman.c | 2 +- src/btree/bt_io.c | 2 +- src/btree/bt_misc.c | 2 +- src/btree/bt_ovfl.c | 2 +- src/btree/bt_page.c | 2 +- src/btree/bt_random.c | 2 +- src/btree/bt_read.c | 2 +- src/btree/bt_rebalance.c | 2 +- src/btree/bt_ret.c | 2 +- src/btree/bt_slvg.c | 2 +- src/btree/bt_split.c | 2 +- src/btree/bt_stat.c | 2 +- src/btree/bt_sync.c | 2 +- src/btree/bt_upgrade.c | 2 +- src/btree/bt_vrfy.c | 2 +- src/btree/bt_vrfy_dsk.c | 2 +- src/btree/bt_walk.c | 2 +- src/btree/col_modify.c | 2 +- src/btree/col_srch.c | 2 +- src/btree/row_key.c | 2 +- src/btree/row_modify.c | 2 +- src/btree/row_srch.c | 2 +- src/cache/cache_las.c | 2 +- src/checksum/arm64/crc32-arm64.c | 2 +- src/checksum/software/checksum.c | 2 +- src/checksum/x86/crc32-x86.c | 2 +- src/config/config.c | 2 +- src/config/config_api.c | 2 +- src/config/config_check.c | 2 +- src/config/config_collapse.c | 2 +- src/config/config_ext.c | 2 +- src/config/config_upgrade.c | 2 +- src/conn/api_version.c | 2 +- src/conn/conn_api.c | 2 +- src/conn/conn_cache.c | 2 +- src/conn/conn_cache_pool.c | 2 +- src/conn/conn_ckpt.c | 2 +- src/conn/conn_dhandle.c | 2 +- src/conn/conn_handle.c | 2 +- src/conn/conn_log.c | 2 +- src/conn/conn_open.c | 2 +- src/conn/conn_stat.c | 2 +- src/conn/conn_sweep.c | 2 +- src/cursor/cur_backup.c | 2 +- src/cursor/cur_bulk.c | 2 +- src/cursor/cur_config.c | 2 +- src/cursor/cur_ds.c | 2 +- src/cursor/cur_dump.c | 2 +- src/cursor/cur_file.c | 2 +- src/cursor/cur_index.c | 2 +- src/cursor/cur_join.c | 2 +- src/cursor/cur_json.c | 2 +- src/cursor/cur_log.c | 2 +- src/cursor/cur_metadata.c | 2 +- src/cursor/cur_stat.c | 2 +- src/cursor/cur_std.c | 2 +- src/cursor/cur_table.c | 2 +- src/docs/build-javadoc.sh | 2 +- src/docs/style/footer.html | 4 ++-- src/docs/tools/doxfilter.py | 2 +- src/docs/tools/fixlinks.py | 2 +- src/evict/evict_file.c | 2 +- src/evict/evict_lru.c | 2 +- src/evict/evict_page.c | 2 +- src/evict/evict_stat.c | 2 +- src/include/api.h | 2 +- src/include/async.h | 2 +- src/include/bitstring.i | 2 +- src/include/block.h | 2 +- src/include/bloom.h | 2 +- src/include/btmem.h | 2 +- src/include/btree.h | 2 +- src/include/btree.i | 2 +- src/include/btree_cmp.i | 2 +- src/include/buf.i | 2 +- src/include/cache.h | 2 +- src/include/cache.i | 2 +- src/include/cell.i | 2 +- src/include/column.i | 2 +- src/include/compact.h | 2 +- src/include/config.h | 2 +- src/include/connection.h | 2 +- src/include/ctype.i | 2 +- src/include/cursor.h | 2 +- src/include/cursor.i | 2 +- src/include/dhandle.h | 2 +- src/include/dlh.h | 2 +- src/include/error.h | 2 +- src/include/gcc.h | 2 +- src/include/hardware.h | 2 +- src/include/intpack.i | 2 +- src/include/lint.h | 2 +- src/include/log.h | 2 +- src/include/log.i | 2 +- src/include/lsm.h | 2 +- src/include/meta.h | 2 +- src/include/misc.h | 2 +- src/include/misc.i | 2 +- src/include/msvc.h | 2 +- src/include/mutex.h | 2 +- src/include/mutex.i | 2 +- src/include/os.h | 2 +- src/include/os_fhandle.i | 2 +- src/include/os_fs.i | 2 +- src/include/os_fstream.i | 2 +- src/include/os_windows.h | 2 +- src/include/packing.i | 2 +- src/include/posix.h | 2 +- src/include/schema.h | 2 +- src/include/serial.i | 2 +- src/include/session.h | 2 +- src/include/stat.h | 2 +- src/include/swap.h | 2 +- src/include/thread_group.h | 2 +- src/include/txn.h | 2 +- src/include/txn.i | 2 +- src/include/verify_build.h | 2 +- src/include/wiredtiger.in | 2 +- src/include/wiredtiger_ext.h | 2 +- src/include/wt_internal.h | 2 +- src/log/log.c | 2 +- src/log/log_slot.c | 2 +- src/lsm/lsm_cursor.c | 2 +- src/lsm/lsm_cursor_bulk.c | 2 +- src/lsm/lsm_manager.c | 2 +- src/lsm/lsm_merge.c | 2 +- src/lsm/lsm_meta.c | 2 +- src/lsm/lsm_stat.c | 2 +- src/lsm/lsm_tree.c | 2 +- src/lsm/lsm_work_unit.c | 2 +- src/lsm/lsm_worker.c | 2 +- src/meta/meta_apply.c | 2 +- src/meta/meta_ckpt.c | 2 +- src/meta/meta_ext.c | 2 +- src/meta/meta_table.c | 2 +- src/meta/meta_track.c | 2 +- src/meta/meta_turtle.c | 2 +- src/os_common/filename.c | 2 +- src/os_common/os_abort.c | 2 +- src/os_common/os_alloc.c | 2 +- src/os_common/os_errno.c | 2 +- src/os_common/os_fhandle.c | 2 +- src/os_common/os_fs_inmemory.c | 2 +- src/os_common/os_fstream.c | 2 +- src/os_common/os_fstream_stdio.c | 2 +- src/os_common/os_getopt.c | 2 +- src/os_common/os_strtouq.c | 2 +- src/os_posix/os_dir.c | 2 +- src/os_posix/os_dlopen.c | 2 +- src/os_posix/os_fallocate.c | 2 +- src/os_posix/os_fs.c | 2 +- src/os_posix/os_getenv.c | 2 +- src/os_posix/os_map.c | 2 +- src/os_posix/os_mtx_cond.c | 2 +- src/os_posix/os_once.c | 2 +- src/os_posix/os_pagesize.c | 2 +- src/os_posix/os_path.c | 2 +- src/os_posix/os_priv.c | 2 +- src/os_posix/os_setvbuf.c | 2 +- src/os_posix/os_sleep.c | 2 +- src/os_posix/os_snprintf.c | 2 +- src/os_posix/os_thread.c | 2 +- src/os_posix/os_time.c | 2 +- src/os_posix/os_yield.c | 2 +- src/os_win/os_dir.c | 2 +- src/os_win/os_dlopen.c | 2 +- src/os_win/os_fs.c | 2 +- src/os_win/os_getenv.c | 2 +- src/os_win/os_map.c | 2 +- src/os_win/os_mtx_cond.c | 2 +- src/os_win/os_once.c | 2 +- src/os_win/os_pagesize.c | 2 +- src/os_win/os_path.c | 2 +- src/os_win/os_priv.c | 2 +- src/os_win/os_setvbuf.c | 2 +- src/os_win/os_sleep.c | 2 +- src/os_win/os_snprintf.c | 2 +- src/os_win/os_thread.c | 2 +- src/os_win/os_time.c | 2 +- src/os_win/os_utf8.c | 4 ++-- src/os_win/os_winerr.c | 2 +- src/os_win/os_yield.c | 2 +- src/packing/pack_api.c | 2 +- src/packing/pack_impl.c | 2 +- src/packing/pack_stream.c | 2 +- src/reconcile/rec_track.c | 2 +- src/reconcile/rec_write.c | 2 +- src/schema/schema_alter.c | 2 +- src/schema/schema_create.c | 2 +- src/schema/schema_drop.c | 2 +- src/schema/schema_list.c | 2 +- src/schema/schema_open.c | 2 +- src/schema/schema_plan.c | 2 +- src/schema/schema_project.c | 2 +- src/schema/schema_rename.c | 2 +- src/schema/schema_stat.c | 2 +- src/schema/schema_truncate.c | 2 +- src/schema/schema_util.c | 2 +- src/schema/schema_worker.c | 2 +- src/session/session_api.c | 2 +- src/session/session_compact.c | 2 +- src/session/session_dhandle.c | 2 +- src/session/session_salvage.c | 2 +- src/support/cond_auto.c | 2 +- src/support/crypto.c | 2 +- src/support/err.c | 2 +- src/support/generation.c | 2 +- src/support/global.c | 2 +- src/support/hash_city.c | 2 +- src/support/hash_fnv.c | 2 +- src/support/hazard.c | 2 +- src/support/hex.c | 2 +- src/support/huffman.c | 4 ++-- src/support/mtx_rw.c | 2 +- src/support/pow.c | 2 +- src/support/rand.c | 2 +- src/support/scratch.c | 2 +- src/support/thread_group.c | 2 +- src/txn/txn.c | 2 +- src/txn/txn_ckpt.c | 2 +- src/txn/txn_ext.c | 2 +- src/txn/txn_log.c | 2 +- src/txn/txn_nsnap.c | 2 +- src/txn/txn_recover.c | 2 +- src/utilities/util.h | 2 +- src/utilities/util_alter.c | 2 +- src/utilities/util_backup.c | 2 +- src/utilities/util_compact.c | 2 +- src/utilities/util_cpyright.c | 4 ++-- src/utilities/util_create.c | 2 +- src/utilities/util_drop.c | 2 +- src/utilities/util_dump.c | 2 +- src/utilities/util_dump.h | 2 +- src/utilities/util_list.c | 2 +- src/utilities/util_load.c | 2 +- src/utilities/util_load.h | 2 +- src/utilities/util_load_json.c | 2 +- src/utilities/util_loadtext.c | 2 +- src/utilities/util_main.c | 2 +- src/utilities/util_misc.c | 2 +- src/utilities/util_printlog.c | 2 +- src/utilities/util_read.c | 2 +- src/utilities/util_rebalance.c | 2 +- src/utilities/util_rename.c | 2 +- src/utilities/util_salvage.c | 2 +- src/utilities/util_stat.c | 2 +- src/utilities/util_truncate.c | 2 +- src/utilities/util_upgrade.c | 2 +- src/utilities/util_verbose.c | 2 +- src/utilities/util_verify.c | 2 +- src/utilities/util_write.c | 2 +- test/bloom/test_bloom.c | 2 +- test/checkpoint/checkpointer.c | 2 +- test/checkpoint/test_checkpoint.c | 2 +- test/checkpoint/test_checkpoint.h | 2 +- test/checkpoint/workers.c | 2 +- test/csuite/scope/main.c | 2 +- test/csuite/wt1965_col_efficiency/main.c | 2 +- test/csuite/wt2246_col_append/main.c | 2 +- test/csuite/wt2323_join_visibility/main.c | 2 +- test/csuite/wt2403_lsm_workload/main.c | 2 +- test/csuite/wt2447_join_main_table/main.c | 2 +- test/csuite/wt2535_insert_race/main.c | 2 +- test/csuite/wt2592_join_schema/main.c | 2 +- test/csuite/wt2695_checksum/main.c | 2 +- test/csuite/wt2719_reconfig/main.c | 2 +- test/csuite/wt2834_join_bloom_fix/main.c | 2 +- test/csuite/wt2853_perf/main.c | 2 +- test/csuite/wt2909_checkpoint_integrity/main.c | 2 +- test/csuite/wt2999_join_extractor/main.c | 2 +- test/csuite/wt3135_search_near_collator/main.c | 2 +- test/csuite/wt3184_dup_index_collator/main.c | 2 +- test/cursor_order/cursor_order.c | 2 +- test/cursor_order/cursor_order.h | 2 +- test/cursor_order/cursor_order_file.c | 2 +- test/cursor_order/cursor_order_ops.c | 2 +- test/fops/file.c | 2 +- test/fops/fops.c | 2 +- test/fops/t.c | 2 +- test/fops/thread.h | 2 +- test/format/backup.c | 2 +- test/format/bdb.c | 2 +- test/format/bulk.c | 2 +- test/format/compact.c | 2 +- test/format/config.c | 2 +- test/format/config.h | 2 +- test/format/format.h | 2 +- test/format/lrt.c | 2 +- test/format/ops.c | 2 +- test/format/rebalance.c | 2 +- test/format/salvage.c | 2 +- test/format/t.c | 2 +- test/format/util.c | 2 +- test/format/wts.c | 2 +- test/huge/huge.c | 2 +- test/java/com/wiredtiger/test/AsyncTest.java | 2 +- test/java/com/wiredtiger/test/AutoCloseTest.java | 2 +- .../java/com/wiredtiger/test/BackupCursorTest.java | 2 +- .../com/wiredtiger/test/ConcurrentCloseTest.java | 2 +- test/java/com/wiredtiger/test/ConfigTest.java | 2 +- test/java/com/wiredtiger/test/CursorTest.java | 2 +- test/java/com/wiredtiger/test/CursorTest02.java | 2 +- test/java/com/wiredtiger/test/CursorTest03.java | 2 +- test/java/com/wiredtiger/test/ExceptionTest.java | 2 +- test/java/com/wiredtiger/test/PackTest.java | 2 +- test/java/com/wiredtiger/test/PackTest02.java | 2 +- test/java/com/wiredtiger/test/PackTest03.java | 2 +- test/java/com/wiredtiger/test/WiredTigerSuite.java | 2 +- test/manydbs/manydbs.c | 2 +- test/packing/intpack-test.c | 2 +- test/packing/intpack-test2.c | 2 +- test/packing/intpack-test3.c | 2 +- test/packing/packing-test.c | 2 +- test/readonly/readonly.c | 2 +- test/recovery/random-abort.c | 2 +- test/recovery/truncated-log.c | 2 +- test/salvage/salvage.c | 2 +- test/suite/helper.py | 2 +- test/suite/run.py | 2 +- test/suite/suite_random.py | 2 +- test/suite/suite_subprocess.py | 2 +- test/suite/test_alter01.py | 2 +- test/suite/test_async01.py | 2 +- test/suite/test_async02.py | 2 +- test/suite/test_async03.py | 2 +- test/suite/test_autoclose.py | 2 +- test/suite/test_backup01.py | 2 +- test/suite/test_backup02.py | 2 +- test/suite/test_backup03.py | 2 +- test/suite/test_backup04.py | 2 +- test/suite/test_backup05.py | 2 +- test/suite/test_backup06.py | 4 ++-- test/suite/test_base01.py | 2 +- test/suite/test_base02.py | 2 +- test/suite/test_base03.py | 2 +- test/suite/test_base04.py | 2 +- test/suite/test_base05.py | 2 +- test/suite/test_baseconfig.py | 2 +- test/suite/test_bug001.py | 2 +- test/suite/test_bug003.py | 2 +- test/suite/test_bug004.py | 2 +- test/suite/test_bug005.py | 2 +- test/suite/test_bug006.py | 2 +- test/suite/test_bug007.py | 2 +- test/suite/test_bug008.py | 2 +- test/suite/test_bug009.py | 2 +- test/suite/test_bug010.py | 2 +- test/suite/test_bug011.py | 4 ++-- test/suite/test_bug012.py | 4 ++-- test/suite/test_bug013.py | 2 +- test/suite/test_bug014.py | 2 +- test/suite/test_bug015.py | 2 +- test/suite/test_bug016.py | 2 +- test/suite/test_bug017.py | 2 +- test/suite/test_bulk01.py | 2 +- test/suite/test_bulk02.py | 2 +- test/suite/test_checkpoint01.py | 2 +- test/suite/test_checkpoint02.py | 2 +- test/suite/test_colgap.py | 4 ++-- test/suite/test_collator.py | 2 +- test/suite/test_compact01.py | 2 +- test/suite/test_compact02.py | 2 +- test/suite/test_compress01.py | 2 +- test/suite/test_config01.py | 2 +- test/suite/test_config02.py | 2 +- test/suite/test_config03.py | 4 ++-- test/suite/test_config04.py | 2 +- test/suite/test_config05.py | 2 +- test/suite/test_config06.py | 2 +- test/suite/test_cursor01.py | 2 +- test/suite/test_cursor02.py | 2 +- test/suite/test_cursor03.py | 2 +- test/suite/test_cursor04.py | 2 +- test/suite/test_cursor05.py | 2 +- test/suite/test_cursor06.py | 2 +- test/suite/test_cursor07.py | 2 +- test/suite/test_cursor08.py | 2 +- test/suite/test_cursor09.py | 2 +- test/suite/test_cursor10.py | 2 +- test/suite/test_cursor11.py | 2 +- test/suite/test_cursor_compare.py | 2 +- test/suite/test_cursor_pin.py | 4 ++-- test/suite/test_cursor_random.py | 2 +- test/suite/test_cursor_random02.py | 2 +- test/suite/test_cursor_tracker.py | 2 +- test/suite/test_drop.py | 2 +- test/suite/test_drop02.py | 2 +- test/suite/test_drop_create.py | 2 +- test/suite/test_dump.py | 2 +- test/suite/test_dupc.py | 2 +- test/suite/test_durability01.py | 2 +- test/suite/test_empty.py | 2 +- test/suite/test_encrypt01.py | 2 +- test/suite/test_encrypt02.py | 2 +- test/suite/test_encrypt03.py | 2 +- test/suite/test_encrypt04.py | 2 +- test/suite/test_encrypt05.py | 2 +- test/suite/test_encrypt06.py | 2 +- test/suite/test_encrypt07.py | 2 +- test/suite/test_env01.py | 2 +- test/suite/test_excl.py | 2 +- test/suite/test_hazard.py | 2 +- test/suite/test_home.py | 2 +- test/suite/test_huffman01.py | 2 +- test/suite/test_huffman02.py | 2 +- test/suite/test_index01.py | 2 +- test/suite/test_index02.py | 2 +- test/suite/test_inmem01.py | 2 +- test/suite/test_inmem02.py | 2 +- test/suite/test_intpack.py | 2 +- test/suite/test_join01.py | 2 +- test/suite/test_join02.py | 2 +- test/suite/test_join03.py | 2 +- test/suite/test_join04.py | 2 +- test/suite/test_join05.py | 2 +- test/suite/test_join06.py | 2 +- test/suite/test_join07.py | 2 +- test/suite/test_join08.py | 2 +- test/suite/test_join09.py | 2 +- test/suite/test_jsondump01.py | 2 +- test/suite/test_jsondump02.py | 2 +- test/suite/test_lsm01.py | 2 +- test/suite/test_lsm02.py | 2 +- test/suite/test_lsm03.py | 2 +- test/suite/test_metadata_cursor01.py | 2 +- test/suite/test_nsnap01.py | 2 +- test/suite/test_nsnap02.py | 2 +- test/suite/test_nsnap03.py | 2 +- test/suite/test_nsnap04.py | 2 +- test/suite/test_overwrite.py | 2 +- test/suite/test_pack.py | 2 +- test/suite/test_perf001.py | 2 +- test/suite/test_readonly01.py | 4 ++-- test/suite/test_readonly02.py | 4 ++-- test/suite/test_readonly03.py | 4 ++-- test/suite/test_rebalance.py | 2 +- test/suite/test_reconfig01.py | 2 +- test/suite/test_reconfig02.py | 2 +- test/suite/test_reconfig03.py | 2 +- test/suite/test_reconfig04.py | 2 +- test/suite/test_rename.py | 2 +- test/suite/test_salvage.py | 2 +- test/suite/test_schema01.py | 2 +- test/suite/test_schema02.py | 2 +- test/suite/test_schema03.py | 2 +- test/suite/test_schema04.py | 2 +- test/suite/test_schema05.py | 2 +- test/suite/test_schema06.py | 2 +- test/suite/test_schema07.py | 2 +- test/suite/test_shared_cache01.py | 2 +- test/suite/test_shared_cache02.py | 2 +- test/suite/test_split.py | 2 +- test/suite/test_stat01.py | 2 +- test/suite/test_stat02.py | 2 +- test/suite/test_stat03.py | 2 +- test/suite/test_stat04.py | 2 +- test/suite/test_stat05.py | 2 +- test/suite/test_stat_log01.py | 2 +- test/suite/test_sweep01.py | 2 +- test/suite/test_sweep02.py | 2 +- test/suite/test_sweep03.py | 2 +- test/suite/test_truncate01.py | 2 +- test/suite/test_truncate02.py | 2 +- test/suite/test_truncate03.py | 2 +- test/suite/test_txn01.py | 2 +- test/suite/test_txn02.py | 2 +- test/suite/test_txn03.py | 2 +- test/suite/test_txn04.py | 2 +- test/suite/test_txn05.py | 2 +- test/suite/test_txn06.py | 2 +- test/suite/test_txn07.py | 2 +- test/suite/test_txn08.py | 2 +- test/suite/test_txn09.py | 2 +- test/suite/test_txn10.py | 2 +- test/suite/test_txn11.py | 2 +- test/suite/test_txn12.py | 2 +- test/suite/test_txn13.py | 2 +- test/suite/test_txn14.py | 2 +- test/suite/test_txn15.py | 2 +- test/suite/test_unicode01.py | 2 +- test/suite/test_upgrade.py | 2 +- test/suite/test_util01.py | 2 +- test/suite/test_util02.py | 2 +- test/suite/test_util03.py | 2 +- test/suite/test_util04.py | 2 +- test/suite/test_util07.py | 2 +- test/suite/test_util08.py | 2 +- test/suite/test_util09.py | 2 +- test/suite/test_util11.py | 2 +- test/suite/test_util12.py | 2 +- test/suite/test_util13.py | 2 +- test/suite/test_verify.py | 2 +- test/suite/test_version.py | 2 +- test/suite/wtdataset.py | 2 +- test/suite/wtscenario.py | 2 +- test/suite/wttest.py | 2 +- test/suite/wtthread.py | 2 +- test/syscall/syscall.py | 2 +- test/syscall/wt2336_base/main.c | 28 ++++++++++++++++++++++ test/thread/file.c | 2 +- test/thread/rw.c | 2 +- test/thread/stats.c | 2 +- test/thread/t.c | 2 +- test/thread/thread.h | 2 +- test/utility/misc.c | 2 +- test/utility/parse_opts.c | 2 +- test/utility/test_util.h | 2 +- test/utility/thread.c | 2 +- test/windows/windows_shim.c | 2 +- test/windows/windows_shim.h | 2 +- test/wtperf/test_conf_dump.py | 28 ++++++++++++++++++++++ tools/wt_ckpt_decode.py | 2 +- tools/wtstats/test/test_wtstats.py | 2 +- tools/wtstats/wtstats.py | 2 +- 615 files changed, 685 insertions(+), 625 deletions(-) diff --git a/LICENSE b/LICENSE index a0f40657511..8abd469a7b1 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2014-2016 MongoDB, Inc. +Copyright (c) 2014-2017 MongoDB, Inc. Copyright (c) 2008-2014 WiredTiger, Inc. All rights reserved. diff --git a/api/leveldb/leveldb_wt.h b/api/leveldb/leveldb_wt.h index 351eb9f3dda..b167e03192e 100644 --- a/api/leveldb/leveldb_wt.h +++ b/api/leveldb/leveldb_wt.h @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/bench/wtperf/config.c b/bench/wtperf/config.c index e4eee66e4cb..c5a3dd40032 100644 --- a/bench/wtperf/config.c +++ b/bench/wtperf/config.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/bench/wtperf/config_opt.h b/bench/wtperf/config_opt.h index 3f1ab642227..68bcd3e45f1 100644 --- a/bench/wtperf/config_opt.h +++ b/bench/wtperf/config_opt.h @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/bench/wtperf/idle_table_cycle.c b/bench/wtperf/idle_table_cycle.c index 4387860cfb2..ce64049ce89 100644 --- a/bench/wtperf/idle_table_cycle.c +++ b/bench/wtperf/idle_table_cycle.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/bench/wtperf/misc.c b/bench/wtperf/misc.c index 0874794e01e..da48c600589 100644 --- a/bench/wtperf/misc.c +++ b/bench/wtperf/misc.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/bench/wtperf/runners/get_ckpt.py b/bench/wtperf/runners/get_ckpt.py index 03bbda7dab1..da188ad47d4 100755 --- a/bench/wtperf/runners/get_ckpt.py +++ b/bench/wtperf/runners/get_ckpt.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/bench/wtperf/track.c b/bench/wtperf/track.c index 86a26120a6a..13ca85aabfd 100644 --- a/bench/wtperf/track.c +++ b/bench/wtperf/track.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 6d79eebe8b2..68bc08226c2 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h index 3efb8ab700e..bd6c1e829ba 100644 --- a/bench/wtperf/wtperf.h +++ b/bench/wtperf/wtperf.h @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/bench/wtperf/wtperf_opt.i b/bench/wtperf/wtperf_opt.i index 90f70457407..b71d93b8cc7 100644 --- a/bench/wtperf/wtperf_opt.i +++ b/bench/wtperf/wtperf_opt.i @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/bench/wtperf/wtperf_throttle.c b/bench/wtperf/wtperf_throttle.c index d104a68175d..75dad09ed50 100644 --- a/bench/wtperf/wtperf_throttle.c +++ b/bench/wtperf/wtperf_throttle.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2015 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/bench/wtperf/wtperf_truncate.c b/bench/wtperf/wtperf_truncate.c index 3fbb740d2c8..5b794009afb 100644 --- a/bench/wtperf/wtperf_truncate.c +++ b/bench/wtperf/wtperf_truncate.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/dist/s_copyright b/dist/s_copyright index 9ff6c20492e..490f7eb6427 100755 --- a/dist/s_copyright +++ b/dist/s_copyright @@ -117,6 +117,8 @@ fi -e '/api\/leveldb\/hyperleveldb\//d' \ -e '/api\/leveldb\/leveldb\//d' \ -e '/api\/leveldb\/rocksdb\//d' \ + -e '/checksum\/power8\//d' \ + -e '/checksum\/zseries\//d' \ -e '/\/3rdparty\//d' \ -e '/\/node_modules\//d' \ -e '/dist\/__/d' \ diff --git a/dist/s_copyright.list b/dist/s_copyright.list index 4999d2a37a2..2db76d9f3c5 100644 --- a/dist/s_copyright.list +++ b/dist/s_copyright.list @@ -9,9 +9,11 @@ skip dist/flags.py skip dist/java_doc.py skip dist/log.py skip dist/log_data.py +skip dist/s_label_loop.py skip dist/stat.py skip dist/stat_data.py skip dist/style.py +skip dist/wtperf_config.py skip lang/java/java_doc.i skip lang/java/src/com/wiredtiger/db/AsyncOp.java skip lang/java/src/com/wiredtiger/db/AsyncOpType.java diff --git a/examples/c/ex_access.c b/examples/c/ex_access.c index d7f3cc557ad..6f24139182d 100644 --- a/examples/c/ex_access.c +++ b/examples/c/ex_access.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c index 82620673fe1..b568d1dd63c 100644 --- a/examples/c/ex_all.c +++ b/examples/c/ex_all.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_async.c b/examples/c/ex_async.c index 5cfafca0418..83cddc2824d 100644 --- a/examples/c/ex_async.c +++ b/examples/c/ex_async.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_backup.c b/examples/c/ex_backup.c index 83cc9b22ecc..ff7d979f286 100644 --- a/examples/c/ex_backup.c +++ b/examples/c/ex_backup.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_call_center.c b/examples/c/ex_call_center.c index cd53a1cdaf9..4483e8b1603 100644 --- a/examples/c/ex_call_center.c +++ b/examples/c/ex_call_center.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_config_parse.c b/examples/c/ex_config_parse.c index 40508b38204..c9720325129 100644 --- a/examples/c/ex_config_parse.c +++ b/examples/c/ex_config_parse.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_cursor.c b/examples/c/ex_cursor.c index b8ed6ab169d..0982aa43073 100644 --- a/examples/c/ex_cursor.c +++ b/examples/c/ex_cursor.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_data_source.c b/examples/c/ex_data_source.c index 387248f6ae2..d40008e0a0e 100644 --- a/examples/c/ex_data_source.c +++ b/examples/c/ex_data_source.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_encrypt.c b/examples/c/ex_encrypt.c index 1520bd286cd..1710d5af16f 100644 --- a/examples/c/ex_encrypt.c +++ b/examples/c/ex_encrypt.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_event_handler.c b/examples/c/ex_event_handler.c index 03809cae7c8..153b44c35bf 100644 --- a/examples/c/ex_event_handler.c +++ b/examples/c/ex_event_handler.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_extending.c b/examples/c/ex_extending.c index f276cdd3e1e..7364fa4bc9e 100644 --- a/examples/c/ex_extending.c +++ b/examples/c/ex_extending.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_extractor.c b/examples/c/ex_extractor.c index f9d7af4af0f..3aaaf90ac90 100644 --- a/examples/c/ex_extractor.c +++ b/examples/c/ex_extractor.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_file_system.c b/examples/c/ex_file_system.c index e807ac54d3b..40f1d66cbc1 100644 --- a/examples/c/ex_file_system.c +++ b/examples/c/ex_file_system.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_hello.c b/examples/c/ex_hello.c index 99534ee8868..616049aaddb 100644 --- a/examples/c/ex_hello.c +++ b/examples/c/ex_hello.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_log.c b/examples/c/ex_log.c index 0d8fbf97233..d4de195ddee 100644 --- a/examples/c/ex_log.c +++ b/examples/c/ex_log.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_pack.c b/examples/c/ex_pack.c index 86725123f55..37b864e62a4 100644 --- a/examples/c/ex_pack.c +++ b/examples/c/ex_pack.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_process.c b/examples/c/ex_process.c index 217730c4288..4bab6a1cd70 100644 --- a/examples/c/ex_process.c +++ b/examples/c/ex_process.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_schema.c b/examples/c/ex_schema.c index a59d9480780..9249ecc1e1a 100644 --- a/examples/c/ex_schema.c +++ b/examples/c/ex_schema.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_stat.c b/examples/c/ex_stat.c index cf9e8fb97d1..7097b53a060 100644 --- a/examples/c/ex_stat.c +++ b/examples/c/ex_stat.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_sync.c b/examples/c/ex_sync.c index b2d74b52f7f..c333ac42e1e 100644 --- a/examples/c/ex_sync.c +++ b/examples/c/ex_sync.c @@ -1,5 +1,5 @@ -/* - * Public Domain 2014-2016 MongoDB, Inc. +/*- + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/c/ex_thread.c b/examples/c/ex_thread.c index fa82bd5f113..b69b3e9e7e9 100644 --- a/examples/c/ex_thread.c +++ b/examples/c/ex_thread.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/java/com/wiredtiger/examples/ex_access.java b/examples/java/com/wiredtiger/examples/ex_access.java index 104f86d5545..ed96ebce7d7 100644 --- a/examples/java/com/wiredtiger/examples/ex_access.java +++ b/examples/java/com/wiredtiger/examples/ex_access.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/java/com/wiredtiger/examples/ex_all.java b/examples/java/com/wiredtiger/examples/ex_all.java index cf8491aa4f8..ff7d371fabd 100644 --- a/examples/java/com/wiredtiger/examples/ex_all.java +++ b/examples/java/com/wiredtiger/examples/ex_all.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/java/com/wiredtiger/examples/ex_async.java b/examples/java/com/wiredtiger/examples/ex_async.java index 2e890095b2d..92054464747 100644 --- a/examples/java/com/wiredtiger/examples/ex_async.java +++ b/examples/java/com/wiredtiger/examples/ex_async.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/java/com/wiredtiger/examples/ex_call_center.java b/examples/java/com/wiredtiger/examples/ex_call_center.java index a3f0f56ded8..921c7f9f57c 100644 --- a/examples/java/com/wiredtiger/examples/ex_call_center.java +++ b/examples/java/com/wiredtiger/examples/ex_call_center.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/java/com/wiredtiger/examples/ex_cursor.java b/examples/java/com/wiredtiger/examples/ex_cursor.java index a0a6e48aa46..498ace12865 100644 --- a/examples/java/com/wiredtiger/examples/ex_cursor.java +++ b/examples/java/com/wiredtiger/examples/ex_cursor.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/java/com/wiredtiger/examples/ex_log.java b/examples/java/com/wiredtiger/examples/ex_log.java index 233ad1361d8..5a76c43b13c 100644 --- a/examples/java/com/wiredtiger/examples/ex_log.java +++ b/examples/java/com/wiredtiger/examples/ex_log.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/java/com/wiredtiger/examples/ex_schema.java b/examples/java/com/wiredtiger/examples/ex_schema.java index 76bff66a688..b7aa64f0c68 100644 --- a/examples/java/com/wiredtiger/examples/ex_schema.java +++ b/examples/java/com/wiredtiger/examples/ex_schema.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/java/com/wiredtiger/examples/ex_stat.java b/examples/java/com/wiredtiger/examples/ex_stat.java index f8877a4620e..799f0396756 100644 --- a/examples/java/com/wiredtiger/examples/ex_stat.java +++ b/examples/java/com/wiredtiger/examples/ex_stat.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/java/com/wiredtiger/examples/ex_thread.java b/examples/java/com/wiredtiger/examples/ex_thread.java index 402daebbd61..2476b3a4d41 100644 --- a/examples/java/com/wiredtiger/examples/ex_thread.java +++ b/examples/java/com/wiredtiger/examples/ex_thread.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/examples/python/ex_access.py b/examples/python/ex_access.py index aa99c1f6547..58ba64607e2 100755 --- a/examples/python/ex_access.py +++ b/examples/python/ex_access.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/examples/python/ex_stat.py b/examples/python/ex_stat.py index 1772badd076..cd99c4f388b 100755 --- a/examples/python/ex_stat.py +++ b/examples/python/ex_stat.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/ext/collators/reverse/reverse_collator.c b/ext/collators/reverse/reverse_collator.c index 7e205f98193..3a589613427 100644 --- a/ext/collators/reverse/reverse_collator.c +++ b/ext/collators/reverse/reverse_collator.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/ext/collators/revint/revint_collator.c b/ext/collators/revint/revint_collator.c index cfad3989adb..9952e922077 100644 --- a/ext/collators/revint/revint_collator.c +++ b/ext/collators/revint/revint_collator.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/ext/compressors/lz4/lz4_compress.c b/ext/compressors/lz4/lz4_compress.c index 885701e564b..7fe72942f1e 100644 --- a/ext/compressors/lz4/lz4_compress.c +++ b/ext/compressors/lz4/lz4_compress.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/ext/compressors/nop/nop_compress.c b/ext/compressors/nop/nop_compress.c index e54013ae8b0..7cdb67c6bf2 100644 --- a/ext/compressors/nop/nop_compress.c +++ b/ext/compressors/nop/nop_compress.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/ext/compressors/snappy/snappy_compress.c b/ext/compressors/snappy/snappy_compress.c index 32f1ddcb9a0..a86de5c3803 100644 --- a/ext/compressors/snappy/snappy_compress.c +++ b/ext/compressors/snappy/snappy_compress.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/ext/compressors/zlib/zlib_compress.c b/ext/compressors/zlib/zlib_compress.c index 09a793646e7..3263b84bfaa 100644 --- a/ext/compressors/zlib/zlib_compress.c +++ b/ext/compressors/zlib/zlib_compress.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/ext/compressors/zstd/zstd_compress.c b/ext/compressors/zstd/zstd_compress.c index ea8ec97602f..d2ebaf20c4e 100644 --- a/ext/compressors/zstd/zstd_compress.c +++ b/ext/compressors/zstd/zstd_compress.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/ext/datasources/helium/helium.c b/ext/datasources/helium/helium.c index c584141b00d..5af954ba1de 100644 --- a/ext/datasources/helium/helium.c +++ b/ext/datasources/helium/helium.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/ext/encryptors/nop/nop_encrypt.c b/ext/encryptors/nop/nop_encrypt.c index af65f397549..3bc0f0f1c71 100644 --- a/ext/encryptors/nop/nop_encrypt.c +++ b/ext/encryptors/nop/nop_encrypt.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/ext/encryptors/rotn/rotn_encrypt.c b/ext/encryptors/rotn/rotn_encrypt.c index 0b905a0540d..5ffc8fcc1a3 100644 --- a/ext/encryptors/rotn/rotn_encrypt.c +++ b/ext/encryptors/rotn/rotn_encrypt.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/ext/extractors/csv/csv_extractor.c b/ext/extractors/csv/csv_extractor.c index e47ce6e2255..9866e1d5b34 100644 --- a/ext/extractors/csv/csv_extractor.c +++ b/ext/extractors/csv/csv_extractor.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/ext/test/fail_fs/fail_fs.c b/ext/test/fail_fs/fail_fs.c index d0d8a14c8c2..b4add92be94 100644 --- a/ext/test/fail_fs/fail_fs.c +++ b/ext/test/fail_fs/fail_fs.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/ext/test/kvs_bdb/kvs_bdb.c b/ext/test/kvs_bdb/kvs_bdb.c index 0791b077750..8f857285b2b 100644 --- a/ext/test/kvs_bdb/kvs_bdb.c +++ b/ext/test/kvs_bdb/kvs_bdb.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/lang/java/src/com/wiredtiger/db/AsyncCallback.java b/lang/java/src/com/wiredtiger/db/AsyncCallback.java index ff428fae4fd..b272d611255 100644 --- a/lang/java/src/com/wiredtiger/db/AsyncCallback.java +++ b/lang/java/src/com/wiredtiger/db/AsyncCallback.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java b/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java index 4f05e153607..5cf52a067b8 100644 --- a/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java +++ b/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/lang/java/src/com/wiredtiger/db/PackInputStream.java b/lang/java/src/com/wiredtiger/db/PackInputStream.java index 732bf450acd..013f9601edb 100644 --- a/lang/java/src/com/wiredtiger/db/PackInputStream.java +++ b/lang/java/src/com/wiredtiger/db/PackInputStream.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/lang/java/src/com/wiredtiger/db/PackOutputStream.java b/lang/java/src/com/wiredtiger/db/PackOutputStream.java index b6804a2992f..9af63db83c9 100644 --- a/lang/java/src/com/wiredtiger/db/PackOutputStream.java +++ b/lang/java/src/com/wiredtiger/db/PackOutputStream.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/lang/java/src/com/wiredtiger/db/PackUtil.java b/lang/java/src/com/wiredtiger/db/PackUtil.java index d47119eaf30..43b627cbf15 100644 --- a/lang/java/src/com/wiredtiger/db/PackUtil.java +++ b/lang/java/src/com/wiredtiger/db/PackUtil.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/lang/java/src/com/wiredtiger/db/WiredTigerException.java b/lang/java/src/com/wiredtiger/db/WiredTigerException.java index 13481efd9e4..233e1598c5d 100644 --- a/lang/java/src/com/wiredtiger/db/WiredTigerException.java +++ b/lang/java/src/com/wiredtiger/db/WiredTigerException.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/lang/java/src/com/wiredtiger/db/WiredTigerPackingException.java b/lang/java/src/com/wiredtiger/db/WiredTigerPackingException.java index 7dd1cfc24be..73d279f9e85 100644 --- a/lang/java/src/com/wiredtiger/db/WiredTigerPackingException.java +++ b/lang/java/src/com/wiredtiger/db/WiredTigerPackingException.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/lang/java/src/com/wiredtiger/db/WiredTigerPanicException.java b/lang/java/src/com/wiredtiger/db/WiredTigerPanicException.java index 8c0e08a77fb..4d82ad3d5df 100644 --- a/lang/java/src/com/wiredtiger/db/WiredTigerPanicException.java +++ b/lang/java/src/com/wiredtiger/db/WiredTigerPanicException.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/lang/java/src/com/wiredtiger/db/WiredTigerRollbackException.java b/lang/java/src/com/wiredtiger/db/WiredTigerRollbackException.java index 47d079c139d..84c7e0803a3 100644 --- a/lang/java/src/com/wiredtiger/db/WiredTigerRollbackException.java +++ b/lang/java/src/com/wiredtiger/db/WiredTigerRollbackException.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/lang/java/wiredtiger.i b/lang/java/wiredtiger.i index 275b708090c..d6fc5fc8b9a 100644 --- a/lang/java/wiredtiger.i +++ b/lang/java/wiredtiger.i @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/lang/python/setup.py b/lang/python/setup.py index 9063a891fb9..c88b268fcff 100644 --- a/lang/python/setup.py +++ b/lang/python/setup.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/lang/python/setup_pip.py b/lang/python/setup_pip.py index 636eecab80a..2ddca407e6b 100644 --- a/lang/python/setup_pip.py +++ b/lang/python/setup_pip.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/lang/python/wiredtiger.i b/lang/python/wiredtiger.i index 7bc84066d64..7a297312bb8 100644 --- a/lang/python/wiredtiger.i +++ b/lang/python/wiredtiger.i @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/lang/python/wiredtiger/fpacking.py b/lang/python/wiredtiger/fpacking.py index cc009a29764..8ae4c1cf99f 100644 --- a/lang/python/wiredtiger/fpacking.py +++ b/lang/python/wiredtiger/fpacking.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/lang/python/wiredtiger/intpacking.py b/lang/python/wiredtiger/intpacking.py index 023c25ab5b3..ed1f00ceb37 100644 --- a/lang/python/wiredtiger/intpacking.py +++ b/lang/python/wiredtiger/intpacking.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/lang/python/wiredtiger/packing.py b/lang/python/wiredtiger/packing.py index 5d21b539888..0506f2afda9 100644 --- a/lang/python/wiredtiger/packing.py +++ b/lang/python/wiredtiger/packing.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/lang/python/wiredtiger/pip_init.py b/lang/python/wiredtiger/pip_init.py index d59c8218976..71c35fabd57 100644 --- a/lang/python/wiredtiger/pip_init.py +++ b/lang/python/wiredtiger/pip_init.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/src/async/async_api.c b/src/async/async_api.c index b9cc995f5a5..ef3af8d15d3 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/async/async_op.c b/src/async/async_op.c index 6908802dbff..d4ca754b95f 100644 --- a/src/async/async_op.c +++ b/src/async/async_op.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/async/async_worker.c b/src/async/async_worker.c index 11f59ed14f1..5e96bb84922 100644 --- a/src/async/async_worker.c +++ b/src/async/async_worker.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/block/block_addr.c b/src/block/block_addr.c index a67efca62a3..6a016776175 100644 --- a/src/block/block_addr.c +++ b/src/block/block_addr.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/block/block_ckpt.c b/src/block/block_ckpt.c index 05e4dcc098e..c20a294c07b 100644 --- a/src/block/block_ckpt.c +++ b/src/block/block_ckpt.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/block/block_compact.c b/src/block/block_compact.c index eb6647dd03c..2ca167f97a4 100644 --- a/src/block/block_compact.c +++ b/src/block/block_compact.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/block/block_ext.c b/src/block/block_ext.c index da7a06d873d..0382e6b92aa 100644 --- a/src/block/block_ext.c +++ b/src/block/block_ext.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/block/block_map.c b/src/block/block_map.c index b7afa61cc55..847f2393043 100644 --- a/src/block/block_map.c +++ b/src/block/block_map.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c index 653ae3dbb6b..d09d7e7925c 100644 --- a/src/block/block_mgr.c +++ b/src/block/block_mgr.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/block/block_open.c b/src/block/block_open.c index 07ceb4c8159..d35a934b0f3 100644 --- a/src/block/block_open.c +++ b/src/block/block_open.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/block/block_read.c b/src/block/block_read.c index 8d4aec7df75..86b0cad13db 100644 --- a/src/block/block_read.c +++ b/src/block/block_read.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/block/block_session.c b/src/block/block_session.c index 6223751effa..e951897e25d 100644 --- a/src/block/block_session.c +++ b/src/block/block_session.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/block/block_slvg.c b/src/block/block_slvg.c index b06a5062f50..888d93772a2 100644 --- a/src/block/block_slvg.c +++ b/src/block/block_slvg.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/block/block_vrfy.c b/src/block/block_vrfy.c index 154765ed079..1058f16bde6 100644 --- a/src/block/block_vrfy.c +++ b/src/block/block_vrfy.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/block/block_write.c b/src/block/block_write.c index ea7859d6a38..7d689fc9bcf 100644 --- a/src/block/block_write.c +++ b/src/block/block_write.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/bloom/bloom.c b/src/bloom/bloom.c index b8d75678835..3a1e861fb5d 100644 --- a/src/bloom/bloom.c +++ b/src/bloom/bloom.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c index 2edcac76d0b..e7edae5ea79 100644 --- a/src/btree/bt_compact.c +++ b/src/btree/bt_compact.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c index 21e575ffca9..091b9345713 100644 --- a/src/btree/bt_curnext.c +++ b/src/btree/bt_curnext.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c index bf4bdad6529..4f0fa77d3e6 100644 --- a/src/btree/bt_curprev.c +++ b/src/btree/bt_curprev.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 944e276fc01..408a00f136d 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index d3f02e29b90..60e208c171c 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_delete.c b/src/btree/bt_delete.c index b55ad291c5e..12c3b044fda 100644 --- a/src/btree/bt_delete.c +++ b/src/btree/bt_delete.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index bab7b8145d6..a04face8f64 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index d76720b19ae..40e7a601d47 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_huffman.c b/src/btree/bt_huffman.c index 918791d9c6e..c5cc9ccf0b0 100644 --- a/src/btree/bt_huffman.c +++ b/src/btree/bt_huffman.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_io.c b/src/btree/bt_io.c index b5e4d52394a..262532a4eab 100644 --- a/src/btree/bt_io.c +++ b/src/btree/bt_io.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_misc.c b/src/btree/bt_misc.c index 3bec65c2567..04b607082d1 100644 --- a/src/btree/bt_misc.c +++ b/src/btree/bt_misc.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_ovfl.c b/src/btree/bt_ovfl.c index ae0da62af57..3d09f655c65 100644 --- a/src/btree/bt_ovfl.c +++ b/src/btree/bt_ovfl.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c index f20f6398e37..ca5f05fe3dc 100644 --- a/src/btree/bt_page.c +++ b/src/btree/bt_page.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_random.c b/src/btree/bt_random.c index c5948ec4ab5..90780b05cab 100644 --- a/src/btree/bt_random.c +++ b/src/btree/bt_random.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index 64874547b9c..8de0d916095 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_rebalance.c b/src/btree/bt_rebalance.c index 68848c7c8f5..47c7888af35 100644 --- a/src/btree/bt_rebalance.c +++ b/src/btree/bt_rebalance.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_ret.c b/src/btree/bt_ret.c index f17fa1b85d1..a761c08eee6 100644 --- a/src/btree/bt_ret.c +++ b/src/btree/bt_ret.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c index 165f932afb2..eb39301abc7 100644 --- a/src/btree/bt_slvg.c +++ b/src/btree/bt_slvg.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 6f03da161cd..db3e3f33abf 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c index 0da0e0807bd..aba5b1349c5 100644 --- a/src/btree/bt_stat.c +++ b/src/btree/bt_stat.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index ead6ccc4ac0..81e9d1757bb 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_upgrade.c b/src/btree/bt_upgrade.c index a9ff16ad496..a7fe3283218 100644 --- a/src/btree/bt_upgrade.c +++ b/src/btree/bt_upgrade.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c index 7475811adc5..21ba2d7a715 100644 --- a/src/btree/bt_vrfy.c +++ b/src/btree/bt_vrfy.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_vrfy_dsk.c b/src/btree/bt_vrfy_dsk.c index a4071c44aee..55c96bbed55 100644 --- a/src/btree/bt_vrfy_dsk.c +++ b/src/btree/bt_vrfy_dsk.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c index 86484feb7c9..8432707fc31 100644 --- a/src/btree/bt_walk.c +++ b/src/btree/bt_walk.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c index 9ccb9728189..5b4ce9a86ad 100644 --- a/src/btree/col_modify.c +++ b/src/btree/col_modify.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c index c72d66f8796..78ee367dc69 100644 --- a/src/btree/col_srch.c +++ b/src/btree/col_srch.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/row_key.c b/src/btree/row_key.c index 032fdf7d897..a455a6acace 100644 --- a/src/btree/row_key.c +++ b/src/btree/row_key.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c index b1a81ca3d9f..2d1e4902836 100644 --- a/src/btree/row_modify.c +++ b/src/btree/row_modify.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c index 9c3d467340e..76bebde7de7 100644 --- a/src/btree/row_srch.c +++ b/src/btree/row_srch.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c index 41da4225f3a..68d7227a762 100644 --- a/src/cache/cache_las.c +++ b/src/cache/cache_las.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/checksum/arm64/crc32-arm64.c b/src/checksum/arm64/crc32-arm64.c index 38b4f623044..3584b296139 100644 --- a/src/checksum/arm64/crc32-arm64.c +++ b/src/checksum/arm64/crc32-arm64.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/src/checksum/software/checksum.c b/src/checksum/software/checksum.c index 65ed74bbe06..a880d38894c 100644 --- a/src/checksum/software/checksum.c +++ b/src/checksum/software/checksum.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/src/checksum/x86/crc32-x86.c b/src/checksum/x86/crc32-x86.c index 82814ecc34d..1c2c08fa1c1 100644 --- a/src/checksum/x86/crc32-x86.c +++ b/src/checksum/x86/crc32-x86.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/src/config/config.c b/src/config/config.c index a47dfe76aec..33eb988fc5a 100644 --- a/src/config/config.c +++ b/src/config/config.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/config/config_api.c b/src/config/config_api.c index c3e590a171a..31efb278d2a 100644 --- a/src/config/config_api.c +++ b/src/config/config_api.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/config/config_check.c b/src/config/config_check.c index 2f372651cb9..8038ae89413 100644 --- a/src/config/config_check.c +++ b/src/config/config_check.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/config/config_collapse.c b/src/config/config_collapse.c index 5abe7556a03..155b700f2dd 100644 --- a/src/config/config_collapse.c +++ b/src/config/config_collapse.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/config/config_ext.c b/src/config/config_ext.c index 88f1390843a..d9e3771c707 100644 --- a/src/config/config_ext.c +++ b/src/config/config_ext.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/config/config_upgrade.c b/src/config/config_upgrade.c index e9ba38c6693..5f2770b1691 100644 --- a/src/config/config_upgrade.c +++ b/src/config/config_upgrade.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/conn/api_version.c b/src/conn/api_version.c index a36cdb8d8eb..c4f3d978c1e 100644 --- a/src/conn/api_version.c +++ b/src/conn/api_version.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 68d45678965..44333ceec3f 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c index 28dd06332e0..ad83f0b2b4a 100644 --- a/src/conn/conn_cache.c +++ b/src/conn/conn_cache.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c index ed078991581..4eee206c241 100644 --- a/src/conn/conn_cache_pool.c +++ b/src/conn/conn_cache_pool.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/conn/conn_ckpt.c b/src/conn/conn_ckpt.c index 7797ed4421c..43673cd335e 100644 --- a/src/conn/conn_ckpt.c +++ b/src/conn/conn_ckpt.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 657cdebf7ee..e342e142039 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index 1266029671e..11b5368e9ad 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index b8b5bd2a908..a24dd170093 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index 2ec1002331a..ab7253c2828 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c index 1247fae84ac..2554083b26c 100644 --- a/src/conn/conn_stat.c +++ b/src/conn/conn_stat.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index 22d90b08438..4ad4050dd9e 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c index 61ced8d11e7..52636e6ba61 100644 --- a/src/cursor/cur_backup.c +++ b/src/cursor/cur_backup.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/cursor/cur_bulk.c b/src/cursor/cur_bulk.c index 68611e30ff1..b9c190cc962 100644 --- a/src/cursor/cur_bulk.c +++ b/src/cursor/cur_bulk.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/cursor/cur_config.c b/src/cursor/cur_config.c index 4001188e21c..8a88bb5449c 100644 --- a/src/cursor/cur_config.c +++ b/src/cursor/cur_config.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/cursor/cur_ds.c b/src/cursor/cur_ds.c index 131d1ffa930..9615d25ae18 100644 --- a/src/cursor/cur_ds.c +++ b/src/cursor/cur_ds.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/cursor/cur_dump.c b/src/cursor/cur_dump.c index d7f18bb61ac..2fea6c5a60e 100644 --- a/src/cursor/cur_dump.c +++ b/src/cursor/cur_dump.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index 205afb607c3..8de33420d17 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/cursor/cur_index.c b/src/cursor/cur_index.c index 6fc01c0421f..9e2a19f9709 100644 --- a/src/cursor/cur_index.c +++ b/src/cursor/cur_index.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 80afaf798dc..62069321777 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/cursor/cur_json.c b/src/cursor/cur_json.c index e8ddb767863..99b4fc1ce4f 100644 --- a/src/cursor/cur_json.c +++ b/src/cursor/cur_json.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/cursor/cur_log.c b/src/cursor/cur_log.c index e5b56aa406f..36804213dbf 100644 --- a/src/cursor/cur_log.c +++ b/src/cursor/cur_log.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/cursor/cur_metadata.c b/src/cursor/cur_metadata.c index fbfc73956e2..284c823768d 100644 --- a/src/cursor/cur_metadata.c +++ b/src/cursor/cur_metadata.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c index 0bff642370d..1c543023779 100644 --- a/src/cursor/cur_stat.c +++ b/src/cursor/cur_stat.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c index 99a9e373354..692f1aa957b 100644 --- a/src/cursor/cur_std.c +++ b/src/cursor/cur_std.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c index 3b72bb0730f..78264c4804d 100644 --- a/src/cursor/cur_table.c +++ b/src/cursor/cur_table.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/docs/build-javadoc.sh b/src/docs/build-javadoc.sh index be886937070..69cb1186467 100755 --- a/src/docs/build-javadoc.sh +++ b/src/docs/build-javadoc.sh @@ -8,5 +8,5 @@ CLASSPATH=$THRIFT_HOME/libthrift.jar:$SLF4J_JAR javadoc -public -d $DOCS/java \ -stylesheetfile $DOCS/style/javadoc.css \ -use -link http://java.sun.com/j2se/1.5.0/docs/api/ \ -header 'WiredTiger API
version '$WT_VERSION'' \ - -windowtitle 'WiredTiger Java API' -bottom 'Copyright (c) 2008-2016 MongoDB, Inc. All rights reserved.' \ + -windowtitle 'WiredTiger Java API' -bottom 'Copyright (c) 2008-2017 MongoDB, Inc. All rights reserved.' \ com.wiredtiger com.wiredtiger.util diff --git a/src/docs/style/footer.html b/src/docs/style/footer.html index e5a7b30eef5..12d25422f89 100644 --- a/src/docs/style/footer.html +++ b/src/docs/style/footer.html @@ -3,13 +3,13 @@ diff --git a/src/docs/tools/doxfilter.py b/src/docs/tools/doxfilter.py index f1c3308c689..301142269c3 100755 --- a/src/docs/tools/doxfilter.py +++ b/src/docs/tools/doxfilter.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/src/docs/tools/fixlinks.py b/src/docs/tools/fixlinks.py index 7163246e3bd..1887665d5be 100755 --- a/src/docs/tools/fixlinks.py +++ b/src/docs/tools/fixlinks.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/src/evict/evict_file.c b/src/evict/evict_file.c index 3d8f4a61ca7..56638934305 100644 --- a/src/evict/evict_file.c +++ b/src/evict/evict_file.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index bd5ae261eaf..8b003cd099e 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index 85689efd0b1..9498e2fb313 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/evict/evict_stat.c b/src/evict/evict_stat.c index 7c2d5722a63..276e737ebbb 100644 --- a/src/evict/evict_stat.c +++ b/src/evict/evict_stat.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/api.h b/src/include/api.h index a3636eb8040..994855e6087 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/async.h b/src/include/async.h index 7a415a4a17a..53a7d982ba5 100644 --- a/src/include/async.h +++ b/src/include/async.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/bitstring.i b/src/include/bitstring.i index 118dc0bba01..d3dc3bebd0f 100644 --- a/src/include/bitstring.i +++ b/src/include/bitstring.i @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/src/include/block.h b/src/include/block.h index 0cf76e34367..b93ed948eee 100644 --- a/src/include/block.h +++ b/src/include/block.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/bloom.h b/src/include/bloom.h index ddc2d64a118..a0efc0bf1fa 100644 --- a/src/include/bloom.h +++ b/src/include/bloom.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/btmem.h b/src/include/btmem.h index 30243d2ae18..d29612143ce 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/btree.h b/src/include/btree.h index 28fe1b94b23..19db27d84a2 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/btree.i b/src/include/btree.i index dcc73f51f1b..5e06dd36ae8 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/btree_cmp.i b/src/include/btree_cmp.i index 23a462e4e50..c1354a7ea4b 100644 --- a/src/include/btree_cmp.i +++ b/src/include/btree_cmp.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/buf.i b/src/include/buf.i index d192e292dcf..17f67afefce 100644 --- a/src/include/buf.i +++ b/src/include/buf.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/cache.h b/src/include/cache.h index 04920c3585a..6e79c2a5868 100644 --- a/src/include/cache.h +++ b/src/include/cache.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/cache.i b/src/include/cache.i index d71978ccf35..899507ceae6 100644 --- a/src/include/cache.i +++ b/src/include/cache.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/cell.i b/src/include/cell.i index 71c2515daf0..0dbf29d21c3 100644 --- a/src/include/cell.i +++ b/src/include/cell.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/column.i b/src/include/column.i index 07b627315e6..c95d338f980 100644 --- a/src/include/column.i +++ b/src/include/column.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/compact.h b/src/include/compact.h index 96797f6b275..d74090c286c 100644 --- a/src/include/compact.h +++ b/src/include/compact.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/config.h b/src/include/config.h index f2746fc76d9..1f21693511b 100644 --- a/src/include/config.h +++ b/src/include/config.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/connection.h b/src/include/connection.h index 7e4ef212b82..b75e2fe1e58 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/ctype.i b/src/include/ctype.i index b4a1ad9f318..3855ae653a5 100644 --- a/src/include/ctype.i +++ b/src/include/ctype.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/cursor.h b/src/include/cursor.h index f32b4250d30..d905197adc1 100644 --- a/src/include/cursor.h +++ b/src/include/cursor.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/cursor.i b/src/include/cursor.i index 12044e0e228..f65364d304c 100644 --- a/src/include/cursor.i +++ b/src/include/cursor.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/dhandle.h b/src/include/dhandle.h index 8861e96112b..0db59d45691 100644 --- a/src/include/dhandle.h +++ b/src/include/dhandle.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/dlh.h b/src/include/dlh.h index 9e49c2ff3cb..d02523b03d1 100644 --- a/src/include/dlh.h +++ b/src/include/dlh.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/error.h b/src/include/error.h index c338acb370f..16f916586cc 100644 --- a/src/include/error.h +++ b/src/include/error.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/gcc.h b/src/include/gcc.h index 22d78fc165a..684d093bbbc 100644 --- a/src/include/gcc.h +++ b/src/include/gcc.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/hardware.h b/src/include/hardware.h index 2530659db21..3ff198be3c7 100644 --- a/src/include/hardware.h +++ b/src/include/hardware.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/intpack.i b/src/include/intpack.i index a534de9d9a8..51e43b21321 100644 --- a/src/include/intpack.i +++ b/src/include/intpack.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/lint.h b/src/include/lint.h index 2d0f47988b7..82474b68d11 100644 --- a/src/include/lint.h +++ b/src/include/lint.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/log.h b/src/include/log.h index fb3c961417f..f80514a3546 100644 --- a/src/include/log.h +++ b/src/include/log.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/log.i b/src/include/log.i index 9e6c36291f7..8c7e5dc65e8 100644 --- a/src/include/log.i +++ b/src/include/log.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/lsm.h b/src/include/lsm.h index e3f6897ef9d..08313438eb8 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/meta.h b/src/include/meta.h index 68ac2e339d0..2dd77157caa 100644 --- a/src/include/meta.h +++ b/src/include/meta.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/misc.h b/src/include/misc.h index 9161a215fdc..c982b74a858 100644 --- a/src/include/misc.h +++ b/src/include/misc.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/misc.i b/src/include/misc.i index 7040886cf82..634cc01f893 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/msvc.h b/src/include/msvc.h index 6c5c8b67647..74a81296dfb 100644 --- a/src/include/msvc.h +++ b/src/include/msvc.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/mutex.h b/src/include/mutex.h index 910eb7af5b9..c0e25ebb295 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/mutex.i b/src/include/mutex.i index 2d483972ed2..44b8494cdbf 100644 --- a/src/include/mutex.i +++ b/src/include/mutex.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/os.h b/src/include/os.h index 73d89268392..ec1860d19a6 100644 --- a/src/include/os.h +++ b/src/include/os.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/os_fhandle.i b/src/include/os_fhandle.i index 428b14556d9..e5177e64b57 100644 --- a/src/include/os_fhandle.i +++ b/src/include/os_fhandle.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/os_fs.i b/src/include/os_fs.i index 4cf1128280e..c81d3f5dec6 100644 --- a/src/include/os_fs.i +++ b/src/include/os_fs.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/os_fstream.i b/src/include/os_fstream.i index 98d0622f346..1561274b388 100644 --- a/src/include/os_fstream.i +++ b/src/include/os_fstream.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/os_windows.h b/src/include/os_windows.h index c1e5f788dc6..764ade9328c 100644 --- a/src/include/os_windows.h +++ b/src/include/os_windows.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/packing.i b/src/include/packing.i index 0eadb2f2027..6d302020f1e 100644 --- a/src/include/packing.i +++ b/src/include/packing.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/posix.h b/src/include/posix.h index 2593c7b6797..aaa88cde4be 100644 --- a/src/include/posix.h +++ b/src/include/posix.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/schema.h b/src/include/schema.h index 1f12116e39f..fa836084834 100644 --- a/src/include/schema.h +++ b/src/include/schema.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/serial.i b/src/include/serial.i index 982f196b0b8..18ff0bb7ec2 100644 --- a/src/include/serial.i +++ b/src/include/serial.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/session.h b/src/include/session.h index fca67fdbde6..adef5e39068 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/stat.h b/src/include/stat.h index 6c274484bcb..5603e404e13 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/swap.h b/src/include/swap.h index 2040ca88a77..bd28296e668 100644 --- a/src/include/swap.h +++ b/src/include/swap.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/thread_group.h b/src/include/thread_group.h index 031af8ca5c3..7375f9dfd87 100644 --- a/src/include/thread_group.h +++ b/src/include/thread_group.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/txn.h b/src/include/txn.h index 01d94e2f731..c1f19ada959 100644 --- a/src/include/txn.h +++ b/src/include/txn.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/txn.i b/src/include/txn.i index 8c4cb4a8ab8..3c096f34b2e 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/verify_build.h b/src/include/verify_build.h index 640f5e4cf5f..d2ccf206990 100644 --- a/src/include/verify_build.h +++ b/src/include/verify_build.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index ddecb2ac765..ec2bdd3b637 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/wiredtiger_ext.h b/src/include/wiredtiger_ext.h index 236d4e07e67..bc61c43e29d 100644 --- a/src/include/wiredtiger_ext.h +++ b/src/include/wiredtiger_ext.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h index 27f732c6539..e250cfc33ba 100644 --- a/src/include/wt_internal.h +++ b/src/include/wt_internal.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/log/log.c b/src/log/log.c index 803d3e8dfab..c0076951e86 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/log/log_slot.c b/src/log/log_slot.c index 97e317ce68c..c92929c91b7 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 52265f02e62..8459259dae7 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/lsm/lsm_cursor_bulk.c b/src/lsm/lsm_cursor_bulk.c index 7a6a40e380f..ba5f04c7697 100644 --- a/src/lsm/lsm_cursor_bulk.c +++ b/src/lsm/lsm_cursor_bulk.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index e33e119aa41..d9726ac419b 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index 8838638f388..882dfa86a18 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/lsm/lsm_meta.c b/src/lsm/lsm_meta.c index fc4dde82470..66ad24dee5b 100644 --- a/src/lsm/lsm_meta.c +++ b/src/lsm/lsm_meta.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/lsm/lsm_stat.c b/src/lsm/lsm_stat.c index 411655878af..63b9e6c6d14 100644 --- a/src/lsm/lsm_stat.c +++ b/src/lsm/lsm_stat.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index a9275976023..115ec2acc8a 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index e6a29666094..1b789b87d2a 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index 1cabbd4888d..ba6adf37cce 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/meta/meta_apply.c b/src/meta/meta_apply.c index dc93180a5e5..9fb70dac081 100644 --- a/src/meta/meta_apply.c +++ b/src/meta/meta_apply.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/meta/meta_ckpt.c b/src/meta/meta_ckpt.c index 151bbe0e081..0e96c4ee6ca 100644 --- a/src/meta/meta_ckpt.c +++ b/src/meta/meta_ckpt.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/meta/meta_ext.c b/src/meta/meta_ext.c index aa1ea8b974d..b1d1d2be28f 100644 --- a/src/meta/meta_ext.c +++ b/src/meta/meta_ext.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c index aca69d0e6a2..827a440073a 100644 --- a/src/meta/meta_table.c +++ b/src/meta/meta_table.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/meta/meta_track.c b/src/meta/meta_track.c index 460b615b267..fe7b467c199 100644 --- a/src/meta/meta_track.c +++ b/src/meta/meta_track.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/meta/meta_turtle.c b/src/meta/meta_turtle.c index 5a089471059..7a99df6b83b 100644 --- a/src/meta/meta_turtle.c +++ b/src/meta/meta_turtle.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_common/filename.c b/src/os_common/filename.c index d5695f63d91..16825410dc3 100644 --- a/src/os_common/filename.c +++ b/src/os_common/filename.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_common/os_abort.c b/src/os_common/os_abort.c index 034eedcfbf8..905f3160acf 100644 --- a/src/os_common/os_abort.c +++ b/src/os_common/os_abort.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_common/os_alloc.c b/src/os_common/os_alloc.c index ef96ed09ea7..2f31316d826 100644 --- a/src/os_common/os_alloc.c +++ b/src/os_common/os_alloc.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_common/os_errno.c b/src/os_common/os_errno.c index 7ac89536e79..d88d06d7610 100644 --- a/src/os_common/os_errno.c +++ b/src/os_common/os_errno.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_common/os_fhandle.c b/src/os_common/os_fhandle.c index 3fd5b5db773..2a67447f8d2 100644 --- a/src/os_common/os_fhandle.c +++ b/src/os_common/os_fhandle.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_common/os_fs_inmemory.c b/src/os_common/os_fs_inmemory.c index 1670e97be45..3ea25530aef 100644 --- a/src/os_common/os_fs_inmemory.c +++ b/src/os_common/os_fs_inmemory.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_common/os_fstream.c b/src/os_common/os_fstream.c index 744da732d84..2fe11b92dd0 100644 --- a/src/os_common/os_fstream.c +++ b/src/os_common/os_fstream.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_common/os_fstream_stdio.c b/src/os_common/os_fstream_stdio.c index 0cc75e109a1..82e82b5f3e5 100644 --- a/src/os_common/os_fstream_stdio.c +++ b/src/os_common/os_fstream_stdio.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_common/os_getopt.c b/src/os_common/os_getopt.c index 960776c3999..26b3b11a1f7 100644 --- a/src/os_common/os_getopt.c +++ b/src/os_common/os_getopt.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/src/os_common/os_strtouq.c b/src/os_common/os_strtouq.c index cb4da0de058..1cedfbdcb08 100644 --- a/src/os_common/os_strtouq.c +++ b/src/os_common/os_strtouq.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_dir.c b/src/os_posix/os_dir.c index 627278540d1..ca11ce918ad 100644 --- a/src/os_posix/os_dir.c +++ b/src/os_posix/os_dir.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_dlopen.c b/src/os_posix/os_dlopen.c index ad1fcc90150..154b15a886c 100644 --- a/src/os_posix/os_dlopen.c +++ b/src/os_posix/os_dlopen.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_fallocate.c b/src/os_posix/os_fallocate.c index 111f6558816..5c57c5964b5 100644 --- a/src/os_posix/os_fallocate.c +++ b/src/os_posix/os_fallocate.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_fs.c b/src/os_posix/os_fs.c index bc8cbf67025..d0391537543 100644 --- a/src/os_posix/os_fs.c +++ b/src/os_posix/os_fs.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/src/os_posix/os_getenv.c b/src/os_posix/os_getenv.c index f779f90acee..5b5a52cb273 100644 --- a/src/os_posix/os_getenv.c +++ b/src/os_posix/os_getenv.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_map.c b/src/os_posix/os_map.c index 91ccc04ff7e..d8aaf5f591f 100644 --- a/src/os_posix/os_map.c +++ b/src/os_posix/os_map.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_mtx_cond.c b/src/os_posix/os_mtx_cond.c index a5ee78f9e3e..10606e8108e 100644 --- a/src/os_posix/os_mtx_cond.c +++ b/src/os_posix/os_mtx_cond.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_once.c b/src/os_posix/os_once.c index 8d900042330..d2913997711 100644 --- a/src/os_posix/os_once.c +++ b/src/os_posix/os_once.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_pagesize.c b/src/os_posix/os_pagesize.c index 4a7e7084cc6..09c52c41fe5 100644 --- a/src/os_posix/os_pagesize.c +++ b/src/os_posix/os_pagesize.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_path.c b/src/os_posix/os_path.c index 6dc54675eb8..fc1a0fd4910 100644 --- a/src/os_posix/os_path.c +++ b/src/os_posix/os_path.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_priv.c b/src/os_posix/os_priv.c index 5ffbbf7a1f2..0e0f5dfb190 100644 --- a/src/os_posix/os_priv.c +++ b/src/os_posix/os_priv.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_setvbuf.c b/src/os_posix/os_setvbuf.c index ac3958be22f..a916ef79311 100644 --- a/src/os_posix/os_setvbuf.c +++ b/src/os_posix/os_setvbuf.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_sleep.c b/src/os_posix/os_sleep.c index 2c60987ced7..a0545d3f5fe 100644 --- a/src/os_posix/os_sleep.c +++ b/src/os_posix/os_sleep.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_snprintf.c b/src/os_posix/os_snprintf.c index 390e2e0334a..3ac0183f5ec 100644 --- a/src/os_posix/os_snprintf.c +++ b/src/os_posix/os_snprintf.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_thread.c b/src/os_posix/os_thread.c index 18e4c347436..d03d0203de6 100644 --- a/src/os_posix/os_thread.c +++ b/src/os_posix/os_thread.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_time.c b/src/os_posix/os_time.c index 6f150ee8ffe..7a1d25427d0 100644 --- a/src/os_posix/os_time.c +++ b/src/os_posix/os_time.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_posix/os_yield.c b/src/os_posix/os_yield.c index f7c43aae746..3190e9e7062 100644 --- a/src/os_posix/os_yield.c +++ b/src/os_posix/os_yield.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_dir.c b/src/os_win/os_dir.c index 47d4f95b793..69235659f04 100644 --- a/src/os_win/os_dir.c +++ b/src/os_win/os_dir.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_dlopen.c b/src/os_win/os_dlopen.c index 6857be2a05e..9ee4d703c7a 100644 --- a/src/os_win/os_dlopen.c +++ b/src/os_win/os_dlopen.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c index 5cf47ea5763..6cbb89ba37d 100644 --- a/src/os_win/os_fs.c +++ b/src/os_win/os_fs.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_getenv.c b/src/os_win/os_getenv.c index fe228328ee6..b7b7f765656 100644 --- a/src/os_win/os_getenv.c +++ b/src/os_win/os_getenv.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_map.c b/src/os_win/os_map.c index a03e6cc3e52..c0aa6dac28f 100644 --- a/src/os_win/os_map.c +++ b/src/os_win/os_map.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_mtx_cond.c b/src/os_win/os_mtx_cond.c index 0001c6c2322..2002d1e925c 100644 --- a/src/os_win/os_mtx_cond.c +++ b/src/os_win/os_mtx_cond.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_once.c b/src/os_win/os_once.c index 347d1883cca..dd21c58b8af 100644 --- a/src/os_win/os_once.c +++ b/src/os_win/os_once.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_pagesize.c b/src/os_win/os_pagesize.c index 648105c0e7c..07b1c3afc5c 100644 --- a/src/os_win/os_pagesize.c +++ b/src/os_win/os_pagesize.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_path.c b/src/os_win/os_path.c index 74050600417..78ad3bda509 100644 --- a/src/os_win/os_path.c +++ b/src/os_win/os_path.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_priv.c b/src/os_win/os_priv.c index 8c1f3893920..acc3793255a 100644 --- a/src/os_win/os_priv.c +++ b/src/os_win/os_priv.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_setvbuf.c b/src/os_win/os_setvbuf.c index b38ab1ebee2..78e42ecf4b5 100644 --- a/src/os_win/os_setvbuf.c +++ b/src/os_win/os_setvbuf.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_sleep.c b/src/os_win/os_sleep.c index 1cb61f7c4aa..4b6bdaea0be 100644 --- a/src/os_win/os_sleep.c +++ b/src/os_win/os_sleep.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_snprintf.c b/src/os_win/os_snprintf.c index f3025b12a60..20231b468c6 100644 --- a/src/os_win/os_snprintf.c +++ b/src/os_win/os_snprintf.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_thread.c b/src/os_win/os_thread.c index 4c8f212bb4f..e1b30b770cd 100644 --- a/src/os_win/os_thread.c +++ b/src/os_win/os_thread.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_time.c b/src/os_win/os_time.c index 6aa5b3719f6..b06ab69c8e4 100644 --- a/src/os_win/os_time.c +++ b/src/os_win/os_time.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_utf8.c b/src/os_win/os_utf8.c index ccd8321aecf..f7bab41c81f 100644 --- a/src/os_win/os_utf8.c +++ b/src/os_win/os_utf8.c @@ -1,7 +1,7 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. + * All rights reserved. * * See the file LICENSE for redistribution information. */ diff --git a/src/os_win/os_winerr.c b/src/os_win/os_winerr.c index 70499580c48..c7748d80fb2 100644 --- a/src/os_win/os_winerr.c +++ b/src/os_win/os_winerr.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/os_win/os_yield.c b/src/os_win/os_yield.c index 038f2efe162..e38fc21e16b 100644 --- a/src/os_win/os_yield.c +++ b/src/os_win/os_yield.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/packing/pack_api.c b/src/packing/pack_api.c index 4c65406cd64..ee7ce6c4c0d 100644 --- a/src/packing/pack_api.c +++ b/src/packing/pack_api.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/packing/pack_impl.c b/src/packing/pack_impl.c index 5dbb0f33842..d40043fc13c 100644 --- a/src/packing/pack_impl.c +++ b/src/packing/pack_impl.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/packing/pack_stream.c b/src/packing/pack_stream.c index 1393eb9a9c1..dc2925acaf3 100644 --- a/src/packing/pack_stream.c +++ b/src/packing/pack_stream.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/reconcile/rec_track.c b/src/reconcile/rec_track.c index 5bf425b1b21..a431465661f 100644 --- a/src/reconcile/rec_track.c +++ b/src/reconcile/rec_track.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 7080e308007..47194478887 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/schema/schema_alter.c b/src/schema/schema_alter.c index b864c5d8b05..346f09a1a64 100644 --- a/src/schema/schema_alter.c +++ b/src/schema/schema_alter.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/schema/schema_create.c b/src/schema/schema_create.c index 0677fa711a5..1ba0961cced 100644 --- a/src/schema/schema_create.c +++ b/src/schema/schema_create.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/schema/schema_drop.c b/src/schema/schema_drop.c index 55f92a64eee..ec12ec3752f 100644 --- a/src/schema/schema_drop.c +++ b/src/schema/schema_drop.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/schema/schema_list.c b/src/schema/schema_list.c index 6060cfb6aca..bbdc3568af3 100644 --- a/src/schema/schema_list.c +++ b/src/schema/schema_list.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/schema/schema_open.c b/src/schema/schema_open.c index c9f2baa81b8..d765882a3b6 100644 --- a/src/schema/schema_open.c +++ b/src/schema/schema_open.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/schema/schema_plan.c b/src/schema/schema_plan.c index 475902be887..cef8260d265 100644 --- a/src/schema/schema_plan.c +++ b/src/schema/schema_plan.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/schema/schema_project.c b/src/schema/schema_project.c index fd59539ae89..9ea8afc8580 100644 --- a/src/schema/schema_project.c +++ b/src/schema/schema_project.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/schema/schema_rename.c b/src/schema/schema_rename.c index 14b5dc7afe7..1868d907d00 100644 --- a/src/schema/schema_rename.c +++ b/src/schema/schema_rename.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/schema/schema_stat.c b/src/schema/schema_stat.c index 345f9164e9b..d2d61febc39 100644 --- a/src/schema/schema_stat.c +++ b/src/schema/schema_stat.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/schema/schema_truncate.c b/src/schema/schema_truncate.c index 563bafa8ffc..05c6f32551a 100644 --- a/src/schema/schema_truncate.c +++ b/src/schema/schema_truncate.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/schema/schema_util.c b/src/schema/schema_util.c index 9de4b916a79..da58d4d7104 100644 --- a/src/schema/schema_util.c +++ b/src/schema/schema_util.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/schema/schema_worker.c b/src/schema/schema_worker.c index 62cdd7d367b..7655456b243 100644 --- a/src/schema/schema_worker.c +++ b/src/schema/schema_worker.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/session/session_api.c b/src/session/session_api.c index 5429d95a11b..73979947f49 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/session/session_compact.c b/src/session/session_compact.c index efbf18bcb14..a22ad6f22ef 100644 --- a/src/session/session_compact.c +++ b/src/session/session_compact.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c index 95fb6a6f90e..3cfbfcead36 100644 --- a/src/session/session_dhandle.c +++ b/src/session/session_dhandle.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/session/session_salvage.c b/src/session/session_salvage.c index 12ce71cdbb0..5a67bd1f7ac 100644 --- a/src/session/session_salvage.c +++ b/src/session/session_salvage.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/support/cond_auto.c b/src/support/cond_auto.c index 600e5eab0ff..2d43eb3bf79 100644 --- a/src/support/cond_auto.c +++ b/src/support/cond_auto.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/support/crypto.c b/src/support/crypto.c index cce0d228832..6208d83b0f2 100644 --- a/src/support/crypto.c +++ b/src/support/crypto.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/support/err.c b/src/support/err.c index 57efde72b23..a91f994b445 100644 --- a/src/support/err.c +++ b/src/support/err.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/support/generation.c b/src/support/generation.c index 3081a3fa0f1..ed615d4c7cd 100644 --- a/src/support/generation.c +++ b/src/support/generation.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/support/global.c b/src/support/global.c index aa69e0db9d6..6525fe21809 100644 --- a/src/support/global.c +++ b/src/support/global.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/support/hash_city.c b/src/support/hash_city.c index 8354532e820..e14368d3529 100644 --- a/src/support/hash_city.c +++ b/src/support/hash_city.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/src/support/hash_fnv.c b/src/support/hash_fnv.c index 83dd2574099..aad698229fd 100644 --- a/src/support/hash_fnv.c +++ b/src/support/hash_fnv.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/src/support/hazard.c b/src/support/hazard.c index 5f93ce45ba1..6a1b7149a91 100644 --- a/src/support/hazard.c +++ b/src/support/hazard.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/support/hex.c b/src/support/hex.c index b54a08dd8f3..e0b1b6de1ea 100644 --- a/src/support/hex.c +++ b/src/support/hex.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/support/huffman.c b/src/support/huffman.c index afc785b39a9..83d1e790ce7 100644 --- a/src/support/huffman.c +++ b/src/support/huffman.c @@ -1,5 +1,5 @@ -/* - * Copyright (c) 2014-2016 MongoDB, Inc. +/*- + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/support/mtx_rw.c b/src/support/mtx_rw.c index 35ad5da23f2..0126e77e9b8 100644 --- a/src/support/mtx_rw.c +++ b/src/support/mtx_rw.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/src/support/pow.c b/src/support/pow.c index 028263581d3..cd770a514b2 100644 --- a/src/support/pow.c +++ b/src/support/pow.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/src/support/rand.c b/src/support/rand.c index 4fae43edc8e..8083b8801c1 100644 --- a/src/support/rand.c +++ b/src/support/rand.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/src/support/scratch.c b/src/support/scratch.c index 485cea90e89..c0e4cfe6ab7 100644 --- a/src/support/scratch.c +++ b/src/support/scratch.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/support/thread_group.c b/src/support/thread_group.c index 38e3049c264..09ba10097ca 100644 --- a/src/support/thread_group.c +++ b/src/support/thread_group.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/txn/txn.c b/src/txn/txn.c index c48053c82af..55d2e3c2900 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index cca5cdcb52d..50684f1a75a 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/txn/txn_ext.c b/src/txn/txn_ext.c index 9ea1af6c4f8..625f970cca8 100644 --- a/src/txn/txn_ext.c +++ b/src/txn/txn_ext.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c index 2931dc1ce82..4d5f1df6a88 100644 --- a/src/txn/txn_log.c +++ b/src/txn/txn_log.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/txn/txn_nsnap.c b/src/txn/txn_nsnap.c index 659570dbcd9..601d9492566 100644 --- a/src/txn/txn_nsnap.c +++ b/src/txn/txn_nsnap.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c index 30932195b1e..e5d1aed7083 100644 --- a/src/txn/txn_recover.c +++ b/src/txn/txn_recover.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util.h b/src/utilities/util.h index 93a96d44219..0238915df07 100644 --- a/src/utilities/util.h +++ b/src/utilities/util.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_alter.c b/src/utilities/util_alter.c index ef01a1ed826..da6316b2364 100644 --- a/src/utilities/util_alter.c +++ b/src/utilities/util_alter.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_backup.c b/src/utilities/util_backup.c index f1b31f7621a..7d809c2a624 100644 --- a/src/utilities/util_backup.c +++ b/src/utilities/util_backup.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_compact.c b/src/utilities/util_compact.c index e469b4dce6e..c8963a8fda6 100644 --- a/src/utilities/util_compact.c +++ b/src/utilities/util_compact.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_cpyright.c b/src/utilities/util_cpyright.c index 7de0eab6dc6..0cfba056387 100644 --- a/src/utilities/util_cpyright.c +++ b/src/utilities/util_cpyright.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * @@ -11,7 +11,7 @@ void util_copyright(void) { - printf("%s\n", "Copyright (c) 2008-2016 MongoDB, Inc."); + printf("%s\n", "Copyright (c) 2008-2017 MongoDB, Inc."); printf("%s\n\n", "All rights reserved."); printf("%s\n\n", diff --git a/src/utilities/util_create.c b/src/utilities/util_create.c index 7c22a67792b..2c7a87fd406 100644 --- a/src/utilities/util_create.c +++ b/src/utilities/util_create.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_drop.c b/src/utilities/util_drop.c index 456005d445d..460c9a6de57 100644 --- a/src/utilities/util_drop.c +++ b/src/utilities/util_drop.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c index 955148b7d46..15200d70f7e 100644 --- a/src/utilities/util_dump.c +++ b/src/utilities/util_dump.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_dump.h b/src/utilities/util_dump.h index e3fd8e6a501..7f037cc659a 100644 --- a/src/utilities/util_dump.h +++ b/src/utilities/util_dump.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_list.c b/src/utilities/util_list.c index f19ba4d1f97..72888e03183 100644 --- a/src/utilities/util_list.c +++ b/src/utilities/util_list.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_load.c b/src/utilities/util_load.c index d2f00402217..7f5c9529f2c 100644 --- a/src/utilities/util_load.c +++ b/src/utilities/util_load.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_load.h b/src/utilities/util_load.h index 710b18bfe83..53fce665ddc 100644 --- a/src/utilities/util_load.h +++ b/src/utilities/util_load.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_load_json.c b/src/utilities/util_load_json.c index c693e2b7651..8bc643f8556 100644 --- a/src/utilities/util_load_json.c +++ b/src/utilities/util_load_json.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_loadtext.c b/src/utilities/util_loadtext.c index 7602d43f8c9..1053ab89694 100644 --- a/src/utilities/util_loadtext.c +++ b/src/utilities/util_loadtext.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_main.c b/src/utilities/util_main.c index c6f225bb667..010af63ea30 100644 --- a/src/utilities/util_main.c +++ b/src/utilities/util_main.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_misc.c b/src/utilities/util_misc.c index e26185a0096..d0fe35ff370 100644 --- a/src/utilities/util_misc.c +++ b/src/utilities/util_misc.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_printlog.c b/src/utilities/util_printlog.c index 5f3ed43905b..2e5ae3aa926 100644 --- a/src/utilities/util_printlog.c +++ b/src/utilities/util_printlog.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_read.c b/src/utilities/util_read.c index 393949b6a1c..a27e8454780 100644 --- a/src/utilities/util_read.c +++ b/src/utilities/util_read.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_rebalance.c b/src/utilities/util_rebalance.c index c188ea17d22..f58f086e777 100644 --- a/src/utilities/util_rebalance.c +++ b/src/utilities/util_rebalance.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_rename.c b/src/utilities/util_rename.c index bb2d40cd103..51e936100ff 100644 --- a/src/utilities/util_rename.c +++ b/src/utilities/util_rename.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_salvage.c b/src/utilities/util_salvage.c index 6cc2278b846..dc311b5ee9a 100644 --- a/src/utilities/util_salvage.c +++ b/src/utilities/util_salvage.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_stat.c b/src/utilities/util_stat.c index 0692afe2819..baabaaeff01 100644 --- a/src/utilities/util_stat.c +++ b/src/utilities/util_stat.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_truncate.c b/src/utilities/util_truncate.c index 35de02345c8..101fa23c0cb 100644 --- a/src/utilities/util_truncate.c +++ b/src/utilities/util_truncate.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_upgrade.c b/src/utilities/util_upgrade.c index f89bd46e133..1ad95bf3cf2 100644 --- a/src/utilities/util_upgrade.c +++ b/src/utilities/util_upgrade.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_verbose.c b/src/utilities/util_verbose.c index e568ec0a414..979cd7451c1 100644 --- a/src/utilities/util_verbose.c +++ b/src/utilities/util_verbose.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_verify.c b/src/utilities/util_verify.c index ace1be7a5de..f0c51596ca4 100644 --- a/src/utilities/util_verify.c +++ b/src/utilities/util_verify.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/src/utilities/util_write.c b/src/utilities/util_write.c index 1d3e6937f8d..ca4203e1e51 100644 --- a/src/utilities/util_write.c +++ b/src/utilities/util_write.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2014-2017 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * diff --git a/test/bloom/test_bloom.c b/test/bloom/test_bloom.c index b6299bbbadc..e12a0ed1550 100644 --- a/test/bloom/test_bloom.c +++ b/test/bloom/test_bloom.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/checkpoint/checkpointer.c b/test/checkpoint/checkpointer.c index 84d2765843a..634a8db9124 100644 --- a/test/checkpoint/checkpointer.c +++ b/test/checkpoint/checkpointer.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/checkpoint/test_checkpoint.c b/test/checkpoint/test_checkpoint.c index e7e1a0b81a5..ca13c2bc4ec 100644 --- a/test/checkpoint/test_checkpoint.c +++ b/test/checkpoint/test_checkpoint.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/checkpoint/test_checkpoint.h b/test/checkpoint/test_checkpoint.h index 347bd2c6e89..223b580c611 100644 --- a/test/checkpoint/test_checkpoint.h +++ b/test/checkpoint/test_checkpoint.h @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/checkpoint/workers.c b/test/checkpoint/workers.c index 82d1b8685c4..a055654df71 100644 --- a/test/checkpoint/workers.c +++ b/test/checkpoint/workers.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/scope/main.c b/test/csuite/scope/main.c index 15dabd97c40..d2d902f33f9 100644 --- a/test/csuite/scope/main.c +++ b/test/csuite/scope/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/wt1965_col_efficiency/main.c b/test/csuite/wt1965_col_efficiency/main.c index e5b73d5e642..e6801d8d37e 100644 --- a/test/csuite/wt1965_col_efficiency/main.c +++ b/test/csuite/wt1965_col_efficiency/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/wt2246_col_append/main.c b/test/csuite/wt2246_col_append/main.c index 9876582fffa..de7916b6859 100644 --- a/test/csuite/wt2246_col_append/main.c +++ b/test/csuite/wt2246_col_append/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/wt2323_join_visibility/main.c b/test/csuite/wt2323_join_visibility/main.c index 617490fec4d..a3ab8c153ed 100644 --- a/test/csuite/wt2323_join_visibility/main.c +++ b/test/csuite/wt2323_join_visibility/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/wt2403_lsm_workload/main.c b/test/csuite/wt2403_lsm_workload/main.c index 0c287484b9e..214276bda21 100644 --- a/test/csuite/wt2403_lsm_workload/main.c +++ b/test/csuite/wt2403_lsm_workload/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/wt2447_join_main_table/main.c b/test/csuite/wt2447_join_main_table/main.c index 656cea04145..8ad721c8d51 100644 --- a/test/csuite/wt2447_join_main_table/main.c +++ b/test/csuite/wt2447_join_main_table/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/wt2535_insert_race/main.c b/test/csuite/wt2535_insert_race/main.c index ba17d485e07..6ea599fc118 100644 --- a/test/csuite/wt2535_insert_race/main.c +++ b/test/csuite/wt2535_insert_race/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/wt2592_join_schema/main.c b/test/csuite/wt2592_join_schema/main.c index be3eff6136c..04fe954c427 100644 --- a/test/csuite/wt2592_join_schema/main.c +++ b/test/csuite/wt2592_join_schema/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/wt2695_checksum/main.c b/test/csuite/wt2695_checksum/main.c index db4fed5dc53..3bd9bfca3c0 100644 --- a/test/csuite/wt2695_checksum/main.c +++ b/test/csuite/wt2695_checksum/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/wt2719_reconfig/main.c b/test/csuite/wt2719_reconfig/main.c index 0942cfc73b2..cef95490c7e 100644 --- a/test/csuite/wt2719_reconfig/main.c +++ b/test/csuite/wt2719_reconfig/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/wt2834_join_bloom_fix/main.c b/test/csuite/wt2834_join_bloom_fix/main.c index e128df29f41..74128406a8e 100644 --- a/test/csuite/wt2834_join_bloom_fix/main.c +++ b/test/csuite/wt2834_join_bloom_fix/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/wt2853_perf/main.c b/test/csuite/wt2853_perf/main.c index 46ba71372e5..096bc64cf82 100644 --- a/test/csuite/wt2853_perf/main.c +++ b/test/csuite/wt2853_perf/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/wt2909_checkpoint_integrity/main.c b/test/csuite/wt2909_checkpoint_integrity/main.c index ce7bd72fa3f..47837356401 100644 --- a/test/csuite/wt2909_checkpoint_integrity/main.c +++ b/test/csuite/wt2909_checkpoint_integrity/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/wt2999_join_extractor/main.c b/test/csuite/wt2999_join_extractor/main.c index 646a7077af1..194ff143610 100644 --- a/test/csuite/wt2999_join_extractor/main.c +++ b/test/csuite/wt2999_join_extractor/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/wt3135_search_near_collator/main.c b/test/csuite/wt3135_search_near_collator/main.c index 8783034a7d8..103a502f808 100644 --- a/test/csuite/wt3135_search_near_collator/main.c +++ b/test/csuite/wt3135_search_near_collator/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/csuite/wt3184_dup_index_collator/main.c b/test/csuite/wt3184_dup_index_collator/main.c index c969e7a1d7e..cd166780c6b 100644 --- a/test/csuite/wt3184_dup_index_collator/main.c +++ b/test/csuite/wt3184_dup_index_collator/main.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/cursor_order/cursor_order.c b/test/cursor_order/cursor_order.c index d3c64b54ab5..2cbca9baf0e 100644 --- a/test/cursor_order/cursor_order.c +++ b/test/cursor_order/cursor_order.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/cursor_order/cursor_order.h b/test/cursor_order/cursor_order.h index 98a7d03c6f3..4f9240f77e8 100644 --- a/test/cursor_order/cursor_order.h +++ b/test/cursor_order/cursor_order.h @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/cursor_order/cursor_order_file.c b/test/cursor_order/cursor_order_file.c index 42d7af54de4..c1f69a65c88 100644 --- a/test/cursor_order/cursor_order_file.c +++ b/test/cursor_order/cursor_order_file.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/cursor_order/cursor_order_ops.c b/test/cursor_order/cursor_order_ops.c index 299f22684c9..5c6cfe363b6 100644 --- a/test/cursor_order/cursor_order_ops.c +++ b/test/cursor_order/cursor_order_ops.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/fops/file.c b/test/fops/file.c index d1cd22ab391..1bb13f8a4de 100644 --- a/test/fops/file.c +++ b/test/fops/file.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/fops/fops.c b/test/fops/fops.c index 3c4de161423..571b7dd59fa 100644 --- a/test/fops/fops.c +++ b/test/fops/fops.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/fops/t.c b/test/fops/t.c index 07ac07349e3..a481c9ff1c4 100644 --- a/test/fops/t.c +++ b/test/fops/t.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/fops/thread.h b/test/fops/thread.h index 89b7984a166..9c1fb0150a6 100644 --- a/test/fops/thread.h +++ b/test/fops/thread.h @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/format/backup.c b/test/format/backup.c index 8aa614fa970..ce8b8fed6bd 100644 --- a/test/format/backup.c +++ b/test/format/backup.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/format/bdb.c b/test/format/bdb.c index 8b61573fdf9..6ee3e063cad 100644 --- a/test/format/bdb.c +++ b/test/format/bdb.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/format/bulk.c b/test/format/bulk.c index dab23bed404..0e7c54516e6 100644 --- a/test/format/bulk.c +++ b/test/format/bulk.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/format/compact.c b/test/format/compact.c index 240e5553697..00aed4c10f0 100644 --- a/test/format/compact.c +++ b/test/format/compact.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/format/config.c b/test/format/config.c index 22b40f7164d..ce1dc6d6e8e 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/format/config.h b/test/format/config.h index b5feb7a5321..bc809a764ce 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/format/format.h b/test/format/format.h index 41cc48c4278..fa898e439be 100644 --- a/test/format/format.h +++ b/test/format/format.h @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/format/lrt.c b/test/format/lrt.c index 69d6b22d71f..4af9d66d0e1 100644 --- a/test/format/lrt.c +++ b/test/format/lrt.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/format/ops.c b/test/format/ops.c index 72e885bd0d6..c3472fd28c3 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/format/rebalance.c b/test/format/rebalance.c index e35c62e7255..195130cfa68 100644 --- a/test/format/rebalance.c +++ b/test/format/rebalance.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/format/salvage.c b/test/format/salvage.c index f82dc34dd5f..a7ac01eff15 100644 --- a/test/format/salvage.c +++ b/test/format/salvage.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/format/t.c b/test/format/t.c index c6686ae8b91..0cfe4e40421 100644 --- a/test/format/t.c +++ b/test/format/t.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/format/util.c b/test/format/util.c index 983d03e2525..06e3f37b830 100644 --- a/test/format/util.c +++ b/test/format/util.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/format/wts.c b/test/format/wts.c index 6aa4784d1c1..c5346bd399e 100644 --- a/test/format/wts.c +++ b/test/format/wts.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/huge/huge.c b/test/huge/huge.c index 2b0d5f498e3..a1fd45711a2 100644 --- a/test/huge/huge.c +++ b/test/huge/huge.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/java/com/wiredtiger/test/AsyncTest.java b/test/java/com/wiredtiger/test/AsyncTest.java index fc28e669313..11d98fb3b4e 100644 --- a/test/java/com/wiredtiger/test/AsyncTest.java +++ b/test/java/com/wiredtiger/test/AsyncTest.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/java/com/wiredtiger/test/AutoCloseTest.java b/test/java/com/wiredtiger/test/AutoCloseTest.java index d7304bb8a44..e4f720ede8b 100644 --- a/test/java/com/wiredtiger/test/AutoCloseTest.java +++ b/test/java/com/wiredtiger/test/AutoCloseTest.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/java/com/wiredtiger/test/BackupCursorTest.java b/test/java/com/wiredtiger/test/BackupCursorTest.java index dd25e4df7d6..af0a2784589 100644 --- a/test/java/com/wiredtiger/test/BackupCursorTest.java +++ b/test/java/com/wiredtiger/test/BackupCursorTest.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/java/com/wiredtiger/test/ConcurrentCloseTest.java b/test/java/com/wiredtiger/test/ConcurrentCloseTest.java index fead0b0bf38..3759057ef1f 100644 --- a/test/java/com/wiredtiger/test/ConcurrentCloseTest.java +++ b/test/java/com/wiredtiger/test/ConcurrentCloseTest.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/java/com/wiredtiger/test/ConfigTest.java b/test/java/com/wiredtiger/test/ConfigTest.java index 2afde7df2dc..432aa245afa 100644 --- a/test/java/com/wiredtiger/test/ConfigTest.java +++ b/test/java/com/wiredtiger/test/ConfigTest.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/java/com/wiredtiger/test/CursorTest.java b/test/java/com/wiredtiger/test/CursorTest.java index 4cd244e5b10..28c92dd8a8d 100644 --- a/test/java/com/wiredtiger/test/CursorTest.java +++ b/test/java/com/wiredtiger/test/CursorTest.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/java/com/wiredtiger/test/CursorTest02.java b/test/java/com/wiredtiger/test/CursorTest02.java index f107bf0b8f2..10705997352 100644 --- a/test/java/com/wiredtiger/test/CursorTest02.java +++ b/test/java/com/wiredtiger/test/CursorTest02.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/java/com/wiredtiger/test/CursorTest03.java b/test/java/com/wiredtiger/test/CursorTest03.java index 64f33f4d7b6..73c7a22f69d 100644 --- a/test/java/com/wiredtiger/test/CursorTest03.java +++ b/test/java/com/wiredtiger/test/CursorTest03.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/java/com/wiredtiger/test/ExceptionTest.java b/test/java/com/wiredtiger/test/ExceptionTest.java index 0c71ea4371b..99719225b05 100644 --- a/test/java/com/wiredtiger/test/ExceptionTest.java +++ b/test/java/com/wiredtiger/test/ExceptionTest.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/java/com/wiredtiger/test/PackTest.java b/test/java/com/wiredtiger/test/PackTest.java index f24ca6e2def..a162fdfd0f0 100644 --- a/test/java/com/wiredtiger/test/PackTest.java +++ b/test/java/com/wiredtiger/test/PackTest.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/java/com/wiredtiger/test/PackTest02.java b/test/java/com/wiredtiger/test/PackTest02.java index 847e3c4ab08..517afd1ec03 100644 --- a/test/java/com/wiredtiger/test/PackTest02.java +++ b/test/java/com/wiredtiger/test/PackTest02.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/java/com/wiredtiger/test/PackTest03.java b/test/java/com/wiredtiger/test/PackTest03.java index c3ae854dcaf..81e7987f987 100644 --- a/test/java/com/wiredtiger/test/PackTest03.java +++ b/test/java/com/wiredtiger/test/PackTest03.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/java/com/wiredtiger/test/WiredTigerSuite.java b/test/java/com/wiredtiger/test/WiredTigerSuite.java index 9322d30671a..c77ff5b3507 100644 --- a/test/java/com/wiredtiger/test/WiredTigerSuite.java +++ b/test/java/com/wiredtiger/test/WiredTigerSuite.java @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/manydbs/manydbs.c b/test/manydbs/manydbs.c index 42020d6ce9a..72ad6228006 100644 --- a/test/manydbs/manydbs.c +++ b/test/manydbs/manydbs.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/packing/intpack-test.c b/test/packing/intpack-test.c index c84823b741b..e7822015091 100644 --- a/test/packing/intpack-test.c +++ b/test/packing/intpack-test.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/packing/intpack-test2.c b/test/packing/intpack-test2.c index 4e612808a35..e216899cebb 100644 --- a/test/packing/intpack-test2.c +++ b/test/packing/intpack-test2.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/packing/intpack-test3.c b/test/packing/intpack-test3.c index 763b0255ecf..00fc80e24a2 100644 --- a/test/packing/intpack-test3.c +++ b/test/packing/intpack-test3.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/packing/packing-test.c b/test/packing/packing-test.c index 919b0622806..bd48ac7125c 100644 --- a/test/packing/packing-test.c +++ b/test/packing/packing-test.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/readonly/readonly.c b/test/readonly/readonly.c index 66c7a0ca692..6f1c34a1fc6 100644 --- a/test/readonly/readonly.c +++ b/test/readonly/readonly.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c index febe6530534..1a759590871 100644 --- a/test/recovery/random-abort.c +++ b/test/recovery/random-abort.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/recovery/truncated-log.c b/test/recovery/truncated-log.c index 89e282bbb96..c9d73e0cf48 100644 --- a/test/recovery/truncated-log.c +++ b/test/recovery/truncated-log.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/salvage/salvage.c b/test/salvage/salvage.c index 83f9c6349bc..2a99d8c4851 100644 --- a/test/salvage/salvage.c +++ b/test/salvage/salvage.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/suite/helper.py b/test/suite/helper.py index d1f41f05e8b..2f9bbf8aa68 100644 --- a/test/suite/helper.py +++ b/test/suite/helper.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/run.py b/test/suite/run.py index 97c58bfdccf..8a936de584b 100644 --- a/test/suite/run.py +++ b/test/suite/run.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/suite_random.py b/test/suite/suite_random.py index fd580cec43b..16a8b89113c 100644 --- a/test/suite/suite_random.py +++ b/test/suite/suite_random.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/suite_subprocess.py b/test/suite/suite_subprocess.py index c56c8d8e933..86134db5f88 100644 --- a/test/suite/suite_subprocess.py +++ b/test/suite/suite_subprocess.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_alter01.py b/test/suite/test_alter01.py index dfdf6b7a17e..7a143afb32c 100644 --- a/test/suite/test_alter01.py +++ b/test/suite/test_alter01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_async01.py b/test/suite/test_async01.py index 158c16a9381..4faaad6b8f4 100644 --- a/test/suite/test_async01.py +++ b/test/suite/test_async01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_async02.py b/test/suite/test_async02.py index 28435fe85b2..fbd743fec29 100644 --- a/test/suite/test_async02.py +++ b/test/suite/test_async02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_async03.py b/test/suite/test_async03.py index 4859360924a..cf993071d73 100644 --- a/test/suite/test_async03.py +++ b/test/suite/test_async03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_autoclose.py b/test/suite/test_autoclose.py index c5633d5a21e..ce152b24fe3 100644 --- a/test/suite/test_autoclose.py +++ b/test/suite/test_autoclose.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_backup01.py b/test/suite/test_backup01.py index 4e98b6d8e77..52d71ab53bb 100644 --- a/test/suite/test_backup01.py +++ b/test/suite/test_backup01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_backup02.py b/test/suite/test_backup02.py index d4089273be0..7d8f653feae 100644 --- a/test/suite/test_backup02.py +++ b/test/suite/test_backup02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_backup03.py b/test/suite/test_backup03.py index c1ed3cc9e1a..7d0bfd5eaaf 100644 --- a/test/suite/test_backup03.py +++ b/test/suite/test_backup03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_backup04.py b/test/suite/test_backup04.py index be52a5e1e97..9f40ae2427b 100644 --- a/test/suite/test_backup04.py +++ b/test/suite/test_backup04.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_backup05.py b/test/suite/test_backup05.py index 4ecb782a0d5..fb44de04694 100644 --- a/test/suite/test_backup05.py +++ b/test/suite/test_backup05.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_backup06.py b/test/suite/test_backup06.py index 9f7a247f2b9..d416ba035b5 100644 --- a/test/suite/test_backup06.py +++ b/test/suite/test_backup06.py @@ -1,8 +1,8 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. - +# # This is free and unencumbered software released into the public domain. # # Anyone is free to copy, modify, publish, use, compile, sell, or diff --git a/test/suite/test_base01.py b/test/suite/test_base01.py index 2a5f96cbae2..f39ec3eb739 100644 --- a/test/suite/test_base01.py +++ b/test/suite/test_base01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_base02.py b/test/suite/test_base02.py index 2b51fe1b530..5e1140a5700 100644 --- a/test/suite/test_base02.py +++ b/test/suite/test_base02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_base03.py b/test/suite/test_base03.py index fe6fa53c288..ad1629db77e 100644 --- a/test/suite/test_base03.py +++ b/test/suite/test_base03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_base04.py b/test/suite/test_base04.py index 973ee1327a5..f9fdddce157 100644 --- a/test/suite/test_base04.py +++ b/test/suite/test_base04.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_base05.py b/test/suite/test_base05.py index 4bee0efcfe2..5ba6d5eda4b 100644 --- a/test/suite/test_base05.py +++ b/test/suite/test_base05.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_baseconfig.py b/test/suite/test_baseconfig.py index 89b3b29544e..3a5778b3bb5 100644 --- a/test/suite/test_baseconfig.py +++ b/test/suite/test_baseconfig.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug001.py b/test/suite/test_bug001.py index 4c4a722285c..4353dad5e68 100644 --- a/test/suite/test_bug001.py +++ b/test/suite/test_bug001.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug003.py b/test/suite/test_bug003.py index 73d9cd13ab9..799c004e17d 100644 --- a/test/suite/test_bug003.py +++ b/test/suite/test_bug003.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug004.py b/test/suite/test_bug004.py index 464cc57e272..a47bdc6dd1e 100644 --- a/test/suite/test_bug004.py +++ b/test/suite/test_bug004.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug005.py b/test/suite/test_bug005.py index 69df175ae67..6d099bf2708 100644 --- a/test/suite/test_bug005.py +++ b/test/suite/test_bug005.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug006.py b/test/suite/test_bug006.py index c0f6055f720..505325de200 100644 --- a/test/suite/test_bug006.py +++ b/test/suite/test_bug006.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug007.py b/test/suite/test_bug007.py index 16cb5da903c..806d75d8394 100644 --- a/test/suite/test_bug007.py +++ b/test/suite/test_bug007.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug008.py b/test/suite/test_bug008.py index c54c92fc864..cb0bb390ad4 100644 --- a/test/suite/test_bug008.py +++ b/test/suite/test_bug008.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug009.py b/test/suite/test_bug009.py index 2bdfb7dec52..7f2af55f2d0 100644 --- a/test/suite/test_bug009.py +++ b/test/suite/test_bug009.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug010.py b/test/suite/test_bug010.py index 89f21e1da04..dfe317bf94e 100644 --- a/test/suite/test_bug010.py +++ b/test/suite/test_bug010.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug011.py b/test/suite/test_bug011.py index 5e0721b93f1..2c3fd831f93 100644 --- a/test/suite/test_bug011.py +++ b/test/suite/test_bug011.py @@ -1,6 +1,6 @@ -#!usr/bin/env python +#!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug012.py b/test/suite/test_bug012.py index 91f49d14b3f..ae80a9c7179 100644 --- a/test/suite/test_bug012.py +++ b/test/suite/test_bug012.py @@ -1,6 +1,6 @@ -#!usr/bin/env python +#!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug013.py b/test/suite/test_bug013.py index a42809aea5f..a15bd42c9da 100644 --- a/test/suite/test_bug013.py +++ b/test/suite/test_bug013.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug014.py b/test/suite/test_bug014.py index 1dee933e839..81e47bc331b 100644 --- a/test/suite/test_bug014.py +++ b/test/suite/test_bug014.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug015.py b/test/suite/test_bug015.py index 68cca49688f..5b2a64dc76e 100644 --- a/test/suite/test_bug015.py +++ b/test/suite/test_bug015.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug016.py b/test/suite/test_bug016.py index 4b8867e1e93..a2a40118008 100644 --- a/test/suite/test_bug016.py +++ b/test/suite/test_bug016.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bug017.py b/test/suite/test_bug017.py index 03e7b2ba714..43aeee07bb6 100644 --- a/test/suite/test_bug017.py +++ b/test/suite/test_bug017.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bulk01.py b/test/suite/test_bulk01.py index 8d5b6a04385..da399faba2d 100644 --- a/test/suite/test_bulk01.py +++ b/test/suite/test_bulk01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_bulk02.py b/test/suite/test_bulk02.py index fb9240e91e7..de9ebec5204 100644 --- a/test/suite/test_bulk02.py +++ b/test/suite/test_bulk02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_checkpoint01.py b/test/suite/test_checkpoint01.py index c0d004db78d..1964a94b31c 100644 --- a/test/suite/test_checkpoint01.py +++ b/test/suite/test_checkpoint01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_checkpoint02.py b/test/suite/test_checkpoint02.py index b5d20fb73b1..3a0a47d8163 100644 --- a/test/suite/test_checkpoint02.py +++ b/test/suite/test_checkpoint02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_colgap.py b/test/suite/test_colgap.py index 91df0fd6c1c..01e52ea1da5 100644 --- a/test/suite/test_colgap.py +++ b/test/suite/test_colgap.py @@ -1,6 +1,6 @@ -#!usr/bin/env python +#!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_collator.py b/test/suite/test_collator.py index 7ce135c8976..320c4e7d7b4 100644 --- a/test/suite/test_collator.py +++ b/test/suite/test_collator.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_compact01.py b/test/suite/test_compact01.py index 56ab6d39076..cfe5c909b1f 100644 --- a/test/suite/test_compact01.py +++ b/test/suite/test_compact01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_compact02.py b/test/suite/test_compact02.py index 803600eea14..eb1eb641191 100644 --- a/test/suite/test_compact02.py +++ b/test/suite/test_compact02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_compress01.py b/test/suite/test_compress01.py index ef1064d294e..1190a9dbe00 100644 --- a/test/suite/test_compress01.py +++ b/test/suite/test_compress01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_config01.py b/test/suite/test_config01.py index cbcb6835525..5252d805e07 100644 --- a/test/suite/test_config01.py +++ b/test/suite/test_config01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_config02.py b/test/suite/test_config02.py index 112a93ef2e0..441aa41d218 100644 --- a/test/suite/test_config02.py +++ b/test/suite/test_config02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_config03.py b/test/suite/test_config03.py index 89038d71319..810d399613a 100644 --- a/test/suite/test_config03.py +++ b/test/suite/test_config03.py @@ -1,6 +1,6 @@ -#!usr/bin/env python +#!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_config04.py b/test/suite/test_config04.py index db8a5f4a16a..b09189be8ea 100644 --- a/test/suite/test_config04.py +++ b/test/suite/test_config04.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_config05.py b/test/suite/test_config05.py index bee63d48da6..5960f01dc8e 100644 --- a/test/suite/test_config05.py +++ b/test/suite/test_config05.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_config06.py b/test/suite/test_config06.py index 55619e8774c..f39fe2d3a4f 100644 --- a/test/suite/test_config06.py +++ b/test/suite/test_config06.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor01.py b/test/suite/test_cursor01.py index 8c66042eec0..41b017aa882 100644 --- a/test/suite/test_cursor01.py +++ b/test/suite/test_cursor01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor02.py b/test/suite/test_cursor02.py index 0771a275cd2..35dc2587b1f 100644 --- a/test/suite/test_cursor02.py +++ b/test/suite/test_cursor02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor03.py b/test/suite/test_cursor03.py index b4598483c12..8910dc741a4 100644 --- a/test/suite/test_cursor03.py +++ b/test/suite/test_cursor03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor04.py b/test/suite/test_cursor04.py index 8cbf922b5eb..b7457ec623d 100644 --- a/test/suite/test_cursor04.py +++ b/test/suite/test_cursor04.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor05.py b/test/suite/test_cursor05.py index 4c276f06ff4..e0cce3dcb5e 100644 --- a/test/suite/test_cursor05.py +++ b/test/suite/test_cursor05.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor06.py b/test/suite/test_cursor06.py index 117e29b0605..280d6f09171 100644 --- a/test/suite/test_cursor06.py +++ b/test/suite/test_cursor06.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor07.py b/test/suite/test_cursor07.py index 19db718fd11..a31d0d401e0 100644 --- a/test/suite/test_cursor07.py +++ b/test/suite/test_cursor07.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor08.py b/test/suite/test_cursor08.py index cc76f528aa9..82b4a3b7c7c 100644 --- a/test/suite/test_cursor08.py +++ b/test/suite/test_cursor08.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor09.py b/test/suite/test_cursor09.py index 9a1fc06b617..de9ae5163b6 100644 --- a/test/suite/test_cursor09.py +++ b/test/suite/test_cursor09.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor10.py b/test/suite/test_cursor10.py index 6cabfde9f1f..11fb43825ad 100644 --- a/test/suite/test_cursor10.py +++ b/test/suite/test_cursor10.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor11.py b/test/suite/test_cursor11.py index e159ec499e6..1f3ea1555f2 100644 --- a/test/suite/test_cursor11.py +++ b/test/suite/test_cursor11.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor_compare.py b/test/suite/test_cursor_compare.py index c0feb1d4867..7cf9ebfb0ca 100644 --- a/test/suite/test_cursor_compare.py +++ b/test/suite/test_cursor_compare.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor_pin.py b/test/suite/test_cursor_pin.py index cb7045c7e41..91690ef6ed2 100644 --- a/test/suite/test_cursor_pin.py +++ b/test/suite/test_cursor_pin.py @@ -1,6 +1,6 @@ -#!usr/bin/env python +#!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor_random.py b/test/suite/test_cursor_random.py index ee0f85a29ee..c7736e322e1 100644 --- a/test/suite/test_cursor_random.py +++ b/test/suite/test_cursor_random.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor_random02.py b/test/suite/test_cursor_random02.py index d18d8efd94d..11ea8e1f489 100644 --- a/test/suite/test_cursor_random02.py +++ b/test/suite/test_cursor_random02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_cursor_tracker.py b/test/suite/test_cursor_tracker.py index a703e6cea70..dee3c6d1b45 100644 --- a/test/suite/test_cursor_tracker.py +++ b/test/suite/test_cursor_tracker.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_drop.py b/test/suite/test_drop.py index e241c05aa68..4be311b8bb2 100644 --- a/test/suite/test_drop.py +++ b/test/suite/test_drop.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_drop02.py b/test/suite/test_drop02.py index 017aa64e312..7ab891daf15 100644 --- a/test/suite/test_drop02.py +++ b/test/suite/test_drop02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_drop_create.py b/test/suite/test_drop_create.py index 654f054a583..eb851c3212f 100644 --- a/test/suite/test_drop_create.py +++ b/test/suite/test_drop_create.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_dump.py b/test/suite/test_dump.py index 3127c7aef00..37f4572b5c9 100644 --- a/test/suite/test_dump.py +++ b/test/suite/test_dump.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_dupc.py b/test/suite/test_dupc.py index c0cf6acc75e..6e35eb361a0 100644 --- a/test/suite/test_dupc.py +++ b/test/suite/test_dupc.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_durability01.py b/test/suite/test_durability01.py index 32cdd795914..97c89aabc4c 100644 --- a/test/suite/test_durability01.py +++ b/test/suite/test_durability01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_empty.py b/test/suite/test_empty.py index 578bec618c9..82a3bb406ee 100644 --- a/test/suite/test_empty.py +++ b/test/suite/test_empty.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_encrypt01.py b/test/suite/test_encrypt01.py index 317bed93246..5b4be01c861 100644 --- a/test/suite/test_encrypt01.py +++ b/test/suite/test_encrypt01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_encrypt02.py b/test/suite/test_encrypt02.py index d950be067e2..c62828cf607 100644 --- a/test/suite/test_encrypt02.py +++ b/test/suite/test_encrypt02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_encrypt03.py b/test/suite/test_encrypt03.py index 302572bd044..85be38a27ae 100644 --- a/test/suite/test_encrypt03.py +++ b/test/suite/test_encrypt03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_encrypt04.py b/test/suite/test_encrypt04.py index 19c0b85d427..7bbc4c617f1 100644 --- a/test/suite/test_encrypt04.py +++ b/test/suite/test_encrypt04.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_encrypt05.py b/test/suite/test_encrypt05.py index d8862321821..d4653b2e9b6 100644 --- a/test/suite/test_encrypt05.py +++ b/test/suite/test_encrypt05.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_encrypt06.py b/test/suite/test_encrypt06.py index 72718e53b2b..62e32597f3d 100644 --- a/test/suite/test_encrypt06.py +++ b/test/suite/test_encrypt06.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_encrypt07.py b/test/suite/test_encrypt07.py index 81c9f1a49ea..4846a520b00 100644 --- a/test/suite/test_encrypt07.py +++ b/test/suite/test_encrypt07.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_env01.py b/test/suite/test_env01.py index 491ef9e8eac..c4ce7f69dd2 100644 --- a/test/suite/test_env01.py +++ b/test/suite/test_env01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_excl.py b/test/suite/test_excl.py index f8628d96ff7..539d599fe32 100644 --- a/test/suite/test_excl.py +++ b/test/suite/test_excl.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_hazard.py b/test/suite/test_hazard.py index f2891fce526..73c63099c85 100644 --- a/test/suite/test_hazard.py +++ b/test/suite/test_hazard.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_home.py b/test/suite/test_home.py index 48bf10d7618..667d466266b 100644 --- a/test/suite/test_home.py +++ b/test/suite/test_home.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_huffman01.py b/test/suite/test_huffman01.py index 8a880f7bae7..04a13210e40 100644 --- a/test/suite/test_huffman01.py +++ b/test/suite/test_huffman01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_huffman02.py b/test/suite/test_huffman02.py index d74704daf58..e009734ffb6 100644 --- a/test/suite/test_huffman02.py +++ b/test/suite/test_huffman02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_index01.py b/test/suite/test_index01.py index 5dfa5506277..bd3794bf730 100644 --- a/test/suite/test_index01.py +++ b/test/suite/test_index01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_index02.py b/test/suite/test_index02.py index 4f424e5d3d2..d2b7b66dfe3 100644 --- a/test/suite/test_index02.py +++ b/test/suite/test_index02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_inmem01.py b/test/suite/test_inmem01.py index 388485db29b..79a44d434d0 100644 --- a/test/suite/test_inmem01.py +++ b/test/suite/test_inmem01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_inmem02.py b/test/suite/test_inmem02.py index b5e07fea967..f2340f6af69 100644 --- a/test/suite/test_inmem02.py +++ b/test/suite/test_inmem02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_intpack.py b/test/suite/test_intpack.py index ae391e68fca..215ebc8856a 100644 --- a/test/suite/test_intpack.py +++ b/test/suite/test_intpack.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_join01.py b/test/suite/test_join01.py index bdd86a06d4f..167f4793ce4 100644 --- a/test/suite/test_join01.py +++ b/test/suite/test_join01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_join02.py b/test/suite/test_join02.py index db11ed01039..7b85791f17a 100644 --- a/test/suite/test_join02.py +++ b/test/suite/test_join02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_join03.py b/test/suite/test_join03.py index dd8111f6ead..552e27632d2 100644 --- a/test/suite/test_join03.py +++ b/test/suite/test_join03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_join04.py b/test/suite/test_join04.py index e65b8b53333..c5ba1ad8c79 100644 --- a/test/suite/test_join04.py +++ b/test/suite/test_join04.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_join05.py b/test/suite/test_join05.py index 7dcb3e08911..aedf7a04c24 100644 --- a/test/suite/test_join05.py +++ b/test/suite/test_join05.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_join06.py b/test/suite/test_join06.py index a6681cdccd0..c3d2aa2b9ca 100644 --- a/test/suite/test_join06.py +++ b/test/suite/test_join06.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_join07.py b/test/suite/test_join07.py index 8fae3539246..87bcc8040d3 100644 --- a/test/suite/test_join07.py +++ b/test/suite/test_join07.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_join08.py b/test/suite/test_join08.py index d344653717b..cdcd89a207a 100644 --- a/test/suite/test_join08.py +++ b/test/suite/test_join08.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_join09.py b/test/suite/test_join09.py index d48353b1580..0441349803e 100644 --- a/test/suite/test_join09.py +++ b/test/suite/test_join09.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_jsondump01.py b/test/suite/test_jsondump01.py index c7fa9cdf397..13eb7e7be26 100644 --- a/test/suite/test_jsondump01.py +++ b/test/suite/test_jsondump01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_jsondump02.py b/test/suite/test_jsondump02.py index 60863c4aa97..5c6bf810e08 100644 --- a/test/suite/test_jsondump02.py +++ b/test/suite/test_jsondump02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_lsm01.py b/test/suite/test_lsm01.py index f705b09b0a4..8a9972261fd 100644 --- a/test/suite/test_lsm01.py +++ b/test/suite/test_lsm01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_lsm02.py b/test/suite/test_lsm02.py index e9628139a97..c35dfa43646 100644 --- a/test/suite/test_lsm02.py +++ b/test/suite/test_lsm02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_lsm03.py b/test/suite/test_lsm03.py index d916db415da..0eb02d546f0 100644 --- a/test/suite/test_lsm03.py +++ b/test/suite/test_lsm03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_metadata_cursor01.py b/test/suite/test_metadata_cursor01.py index 284e26bc936..f9476a06642 100644 --- a/test/suite/test_metadata_cursor01.py +++ b/test/suite/test_metadata_cursor01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_nsnap01.py b/test/suite/test_nsnap01.py index 4d5555277fe..ee97e4f9985 100644 --- a/test/suite/test_nsnap01.py +++ b/test/suite/test_nsnap01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_nsnap02.py b/test/suite/test_nsnap02.py index ed1c96ebe50..689c704c97e 100644 --- a/test/suite/test_nsnap02.py +++ b/test/suite/test_nsnap02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_nsnap03.py b/test/suite/test_nsnap03.py index 6964fb914c3..7be6557d458 100644 --- a/test/suite/test_nsnap03.py +++ b/test/suite/test_nsnap03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_nsnap04.py b/test/suite/test_nsnap04.py index 8d491540d74..f53c9b5b3cd 100644 --- a/test/suite/test_nsnap04.py +++ b/test/suite/test_nsnap04.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_overwrite.py b/test/suite/test_overwrite.py index c894de99bd0..0e026235302 100644 --- a/test/suite/test_overwrite.py +++ b/test/suite/test_overwrite.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_pack.py b/test/suite/test_pack.py index 9d833f49e16..951c0b0da20 100644 --- a/test/suite/test_pack.py +++ b/test/suite/test_pack.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_perf001.py b/test/suite/test_perf001.py index 6331a3f64d6..4ab958996f0 100644 --- a/test/suite/test_perf001.py +++ b/test/suite/test_perf001.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_readonly01.py b/test/suite/test_readonly01.py index f41280a3283..ee5f78294f4 100644 --- a/test/suite/test_readonly01.py +++ b/test/suite/test_readonly01.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -# Public Domain 2016-2016 MongoDB, Inc. -# Public Domain 2008-2016 WiredTiger, Inc. +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. # diff --git a/test/suite/test_readonly02.py b/test/suite/test_readonly02.py index 0df5465642d..3d3de8186d9 100644 --- a/test/suite/test_readonly02.py +++ b/test/suite/test_readonly02.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -# Public Domain 2016-2016 MongoDB, Inc. -# Public Domain 2008-2016 WiredTiger, Inc. +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. # diff --git a/test/suite/test_readonly03.py b/test/suite/test_readonly03.py index f30c591ca59..6fe2942ca18 100644 --- a/test/suite/test_readonly03.py +++ b/test/suite/test_readonly03.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -# Public Domain 2016-2016 MongoDB, Inc. -# Public Domain 2008-2016 WiredTiger, Inc. +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. # diff --git a/test/suite/test_rebalance.py b/test/suite/test_rebalance.py index 2d160bafec0..867d71b6d35 100644 --- a/test/suite/test_rebalance.py +++ b/test/suite/test_rebalance.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_reconfig01.py b/test/suite/test_reconfig01.py index cbc8bca5740..646b8622a72 100644 --- a/test/suite/test_reconfig01.py +++ b/test/suite/test_reconfig01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_reconfig02.py b/test/suite/test_reconfig02.py index 042d3bbe71f..3bdc19fb2f8 100644 --- a/test/suite/test_reconfig02.py +++ b/test/suite/test_reconfig02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_reconfig03.py b/test/suite/test_reconfig03.py index 0019bf4814e..3ab21735bf0 100644 --- a/test/suite/test_reconfig03.py +++ b/test/suite/test_reconfig03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_reconfig04.py b/test/suite/test_reconfig04.py index 51d9b91c1f4..37288150d35 100644 --- a/test/suite/test_reconfig04.py +++ b/test/suite/test_reconfig04.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_rename.py b/test/suite/test_rename.py index 4e3af8e13e0..0e55a445cd4 100644 --- a/test/suite/test_rename.py +++ b/test/suite/test_rename.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_salvage.py b/test/suite/test_salvage.py index 3b648a7f170..14045afa21e 100644 --- a/test/suite/test_salvage.py +++ b/test/suite/test_salvage.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_schema01.py b/test/suite/test_schema01.py index 52bff7a13ff..983593dad83 100644 --- a/test/suite/test_schema01.py +++ b/test/suite/test_schema01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_schema02.py b/test/suite/test_schema02.py index ffe710b7d3e..e34063aa66b 100644 --- a/test/suite/test_schema02.py +++ b/test/suite/test_schema02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_schema03.py b/test/suite/test_schema03.py index e5a6528914a..e5471a4de73 100644 --- a/test/suite/test_schema03.py +++ b/test/suite/test_schema03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_schema04.py b/test/suite/test_schema04.py index 63c638b916c..765040ae73f 100644 --- a/test/suite/test_schema04.py +++ b/test/suite/test_schema04.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_schema05.py b/test/suite/test_schema05.py index d536a629373..f3a75447ee4 100644 --- a/test/suite/test_schema05.py +++ b/test/suite/test_schema05.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_schema06.py b/test/suite/test_schema06.py index e0eec189137..ef8434a1eaa 100644 --- a/test/suite/test_schema06.py +++ b/test/suite/test_schema06.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_schema07.py b/test/suite/test_schema07.py index 3e4b1d28a4d..8de0c477157 100644 --- a/test/suite/test_schema07.py +++ b/test/suite/test_schema07.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_shared_cache01.py b/test/suite/test_shared_cache01.py index c3bd946cc4b..5b348a0ca87 100644 --- a/test/suite/test_shared_cache01.py +++ b/test/suite/test_shared_cache01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_shared_cache02.py b/test/suite/test_shared_cache02.py index 67f9bf7c6b7..c6e5209ff8a 100644 --- a/test/suite/test_shared_cache02.py +++ b/test/suite/test_shared_cache02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_split.py b/test/suite/test_split.py index 411778f21ae..b3de91d3cdb 100644 --- a/test/suite/test_split.py +++ b/test/suite/test_split.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_stat01.py b/test/suite/test_stat01.py index 2b04a3cbcd5..03f0507ced4 100644 --- a/test/suite/test_stat01.py +++ b/test/suite/test_stat01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_stat02.py b/test/suite/test_stat02.py index 45af283ed02..d3bc18cb3e6 100644 --- a/test/suite/test_stat02.py +++ b/test/suite/test_stat02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_stat03.py b/test/suite/test_stat03.py index 7e5cf46ef13..d486cbda0b6 100644 --- a/test/suite/test_stat03.py +++ b/test/suite/test_stat03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_stat04.py b/test/suite/test_stat04.py index b5309efff37..af5f0e282bf 100644 --- a/test/suite/test_stat04.py +++ b/test/suite/test_stat04.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_stat05.py b/test/suite/test_stat05.py index ef4d65e85e4..6478bb5e58c 100644 --- a/test/suite/test_stat05.py +++ b/test/suite/test_stat05.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_stat_log01.py b/test/suite/test_stat_log01.py index 65ce80dfe7d..8f17be042d6 100644 --- a/test/suite/test_stat_log01.py +++ b/test/suite/test_stat_log01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_sweep01.py b/test/suite/test_sweep01.py index 5559190caca..4d11942dc54 100644 --- a/test/suite/test_sweep01.py +++ b/test/suite/test_sweep01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_sweep02.py b/test/suite/test_sweep02.py index cff45e0d2f9..76931ecbfbd 100644 --- a/test/suite/test_sweep02.py +++ b/test/suite/test_sweep02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_sweep03.py b/test/suite/test_sweep03.py index 61078fa96b5..5ff747b1056 100644 --- a/test/suite/test_sweep03.py +++ b/test/suite/test_sweep03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_truncate01.py b/test/suite/test_truncate01.py index 98b741ba6a4..88d29d8443a 100644 --- a/test/suite/test_truncate01.py +++ b/test/suite/test_truncate01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_truncate02.py b/test/suite/test_truncate02.py index 729825b26d4..06fa6bfc94f 100644 --- a/test/suite/test_truncate02.py +++ b/test/suite/test_truncate02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_truncate03.py b/test/suite/test_truncate03.py index 2b4628950b3..613ab772571 100644 --- a/test/suite/test_truncate03.py +++ b/test/suite/test_truncate03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_txn01.py b/test/suite/test_txn01.py index d4ca2ac8d12..e0030909331 100644 --- a/test/suite/test_txn01.py +++ b/test/suite/test_txn01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_txn02.py b/test/suite/test_txn02.py index 0ab05baea36..f51d69d4a6c 100644 --- a/test/suite/test_txn02.py +++ b/test/suite/test_txn02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_txn03.py b/test/suite/test_txn03.py index 18a0e096767..53e9b8e6206 100644 --- a/test/suite/test_txn03.py +++ b/test/suite/test_txn03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_txn04.py b/test/suite/test_txn04.py index d8f6774ded1..470e37d6a9c 100644 --- a/test/suite/test_txn04.py +++ b/test/suite/test_txn04.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_txn05.py b/test/suite/test_txn05.py index c1902404f8d..69bb9611262 100644 --- a/test/suite/test_txn05.py +++ b/test/suite/test_txn05.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_txn06.py b/test/suite/test_txn06.py index c91dc6a623b..520c25f9b86 100644 --- a/test/suite/test_txn06.py +++ b/test/suite/test_txn06.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_txn07.py b/test/suite/test_txn07.py index ffb6ecfbecb..fe1bdd346a0 100644 --- a/test/suite/test_txn07.py +++ b/test/suite/test_txn07.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_txn08.py b/test/suite/test_txn08.py index 04faed9d45a..5ba0a529a31 100644 --- a/test/suite/test_txn08.py +++ b/test/suite/test_txn08.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_txn09.py b/test/suite/test_txn09.py index 768d714e248..e0710803f36 100644 --- a/test/suite/test_txn09.py +++ b/test/suite/test_txn09.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_txn10.py b/test/suite/test_txn10.py index a4745e60066..d27f83bf2e4 100644 --- a/test/suite/test_txn10.py +++ b/test/suite/test_txn10.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_txn11.py b/test/suite/test_txn11.py index 3c02b1e86e3..4b4db9ce315 100644 --- a/test/suite/test_txn11.py +++ b/test/suite/test_txn11.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_txn12.py b/test/suite/test_txn12.py index 32c058bea85..a0ecfb42bdb 100644 --- a/test/suite/test_txn12.py +++ b/test/suite/test_txn12.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_txn13.py b/test/suite/test_txn13.py index 2bf49486b3a..b9172662da0 100644 --- a/test/suite/test_txn13.py +++ b/test/suite/test_txn13.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_txn14.py b/test/suite/test_txn14.py index f9ccabaab8b..7579bbc8e54 100644 --- a/test/suite/test_txn14.py +++ b/test/suite/test_txn14.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_txn15.py b/test/suite/test_txn15.py index a2bfb626338..762c0613735 100644 --- a/test/suite/test_txn15.py +++ b/test/suite/test_txn15.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_unicode01.py b/test/suite/test_unicode01.py index 0796abf4607..21d6a714268 100644 --- a/test/suite/test_unicode01.py +++ b/test/suite/test_unicode01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_upgrade.py b/test/suite/test_upgrade.py index 4eb6a9e6817..6672daf11d6 100644 --- a/test/suite/test_upgrade.py +++ b/test/suite/test_upgrade.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_util01.py b/test/suite/test_util01.py index 5795bb5b2e6..a181acd5568 100644 --- a/test/suite/test_util01.py +++ b/test/suite/test_util01.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_util02.py b/test/suite/test_util02.py index 7aa24605ed1..59c34e6ef0e 100644 --- a/test/suite/test_util02.py +++ b/test/suite/test_util02.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_util03.py b/test/suite/test_util03.py index ac93d04f799..e5e4f624991 100644 --- a/test/suite/test_util03.py +++ b/test/suite/test_util03.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_util04.py b/test/suite/test_util04.py index d165d350adb..cbfd63b6b65 100644 --- a/test/suite/test_util04.py +++ b/test/suite/test_util04.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_util07.py b/test/suite/test_util07.py index 1175ad8eb13..7d3d6ec5f37 100644 --- a/test/suite/test_util07.py +++ b/test/suite/test_util07.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_util08.py b/test/suite/test_util08.py index 456b68675c6..3c4561da263 100644 --- a/test/suite/test_util08.py +++ b/test/suite/test_util08.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_util09.py b/test/suite/test_util09.py index 4b514401478..3138ea087a2 100644 --- a/test/suite/test_util09.py +++ b/test/suite/test_util09.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_util11.py b/test/suite/test_util11.py index d5d1cda8c39..68cb751d364 100644 --- a/test/suite/test_util11.py +++ b/test/suite/test_util11.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_util12.py b/test/suite/test_util12.py index 3821139f266..6f4638e93aa 100644 --- a/test/suite/test_util12.py +++ b/test/suite/test_util12.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_util13.py b/test/suite/test_util13.py index 7890d4fdb1b..79dc232f5ef 100644 --- a/test/suite/test_util13.py +++ b/test/suite/test_util13.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_verify.py b/test/suite/test_verify.py index 46ae667464a..615b8e278ac 100644 --- a/test/suite/test_verify.py +++ b/test/suite/test_verify.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/test_version.py b/test/suite/test_version.py index 569f181acda..c854a393c2b 100644 --- a/test/suite/test_version.py +++ b/test/suite/test_version.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/wtdataset.py b/test/suite/wtdataset.py index 946b97d995f..9adbf954baa 100644 --- a/test/suite/wtdataset.py +++ b/test/suite/wtdataset.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/wtscenario.py b/test/suite/wtscenario.py index 8576b3ac876..86faea330a3 100644 --- a/test/suite/wtscenario.py +++ b/test/suite/wtscenario.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/wttest.py b/test/suite/wttest.py index e91838544b9..1c95eb355ae 100644 --- a/test/suite/wttest.py +++ b/test/suite/wttest.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/suite/wtthread.py b/test/suite/wtthread.py index 046a915394d..54fc4a1961e 100644 --- a/test/suite/wtthread.py +++ b/test/suite/wtthread.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/syscall/syscall.py b/test/syscall/syscall.py index 59c2f347146..1caa718b4fc 100644 --- a/test/syscall/syscall.py +++ b/test/syscall/syscall.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/test/syscall/wt2336_base/main.c b/test/syscall/wt2336_base/main.c index 22420371dd0..f22af235c19 100644 --- a/test/syscall/wt2336_base/main.c +++ b/test/syscall/wt2336_base/main.c @@ -1,3 +1,31 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + #include #include // TODO diff --git a/test/thread/file.c b/test/thread/file.c index 7a7d16c4cd6..66ee9dd8348 100644 --- a/test/thread/file.c +++ b/test/thread/file.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/thread/rw.c b/test/thread/rw.c index e8a2650ca51..cbbd806c559 100644 --- a/test/thread/rw.c +++ b/test/thread/rw.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/thread/stats.c b/test/thread/stats.c index 839d65e8a4d..3950576a310 100644 --- a/test/thread/stats.c +++ b/test/thread/stats.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/thread/t.c b/test/thread/t.c index d2ed4c74bb7..4b767e7f476 100644 --- a/test/thread/t.c +++ b/test/thread/t.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/thread/thread.h b/test/thread/thread.h index edcb919ec32..86b1b55a30e 100644 --- a/test/thread/thread.h +++ b/test/thread/thread.h @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/utility/misc.c b/test/utility/misc.c index 934dac86a7b..e119fef47f1 100644 --- a/test/utility/misc.c +++ b/test/utility/misc.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/utility/parse_opts.c b/test/utility/parse_opts.c index c3eff3360de..e5bd8ce0130 100644 --- a/test/utility/parse_opts.c +++ b/test/utility/parse_opts.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/utility/test_util.h b/test/utility/test_util.h index 406ed2c4961..66746c794e8 100644 --- a/test/utility/test_util.h +++ b/test/utility/test_util.h @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/utility/thread.c b/test/utility/thread.c index 122ad554442..08f49c54c5e 100644 --- a/test/utility/thread.c +++ b/test/utility/thread.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/windows/windows_shim.c b/test/windows/windows_shim.c index b161b29c2fa..33980260dc6 100644 --- a/test/windows/windows_shim.c +++ b/test/windows/windows_shim.c @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/windows/windows_shim.h b/test/windows/windows_shim.h index 8985904fb19..d3950ba9a18 100644 --- a/test/windows/windows_shim.h +++ b/test/windows/windows_shim.h @@ -1,5 +1,5 @@ /*- - * Public Domain 2014-2016 MongoDB, Inc. + * Public Domain 2014-2017 MongoDB, Inc. * Public Domain 2008-2014 WiredTiger, Inc. * * This is free and unencumbered software released into the public domain. diff --git a/test/wtperf/test_conf_dump.py b/test/wtperf/test_conf_dump.py index ef7f276a1d0..bbfb8e819e9 100644 --- a/test/wtperf/test_conf_dump.py +++ b/test/wtperf/test_conf_dump.py @@ -1,3 +1,31 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + # Usage: python test_conf_dump.py # # This script tests if the config file dumped in the test directory corresponds diff --git a/tools/wt_ckpt_decode.py b/tools/wt_ckpt_decode.py index f78bf8c34bf..0d45a652063 100644 --- a/tools/wt_ckpt_decode.py +++ b/tools/wt_ckpt_decode.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/tools/wtstats/test/test_wtstats.py b/tools/wtstats/test/test_wtstats.py index ac730c2fd4d..3d4e9dd1c49 100644 --- a/tools/wtstats/test/test_wtstats.py +++ b/tools/wtstats/test/test_wtstats.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/tools/wtstats/wtstats.py b/tools/wtstats/wtstats.py index bf5557d12f4..7d9e71b0360 100755 --- a/tools/wtstats/wtstats.py +++ b/tools/wtstats/wtstats.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. -- cgit v1.2.1 From 429b6898403ed404dc73cdc9798b6d61b2304631 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 7 Apr 2017 00:45:37 -0400 Subject: WT-3245 Avoid hangs on shutdown when a utility thread encounters an error (#3361) Panic if the log server threads exit unexpectedly. Remove an async worker test for panic -- it doesn't make a difference if an async worker thread exits in the case of a panic, but it doesn't gain us anything either, might as well have the async threads look the same as the other server threads. --- src/async/async_worker.c | 5 ++--- src/conn/conn_log.c | 7 ++++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/async/async_worker.c b/src/async/async_worker.c index 5e96bb84922..2390d9e47cd 100644 --- a/src/async/async_worker.c +++ b/src/async/async_worker.c @@ -301,11 +301,10 @@ __wt_async_worker(void *arg) WT_ERR(__async_op_dequeue(conn, session, &op)); if (op != NULL && op != &async->flush_op) { /* - * If an operation fails, we want the worker thread to - * keep running, unless there is a panic. + * Operation failure doesn't cause the worker thread to + * exit. */ (void)__async_worker_op(session, op, &worker); - WT_ERR(WT_SESSION_CHECK_PANIC(session)); } else if (async->flush_state == WT_ASYNC_FLUSHING) { /* * Worker flushing going on. Last worker to the party diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index a24dd170093..47ba4d45dc3 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -522,7 +522,7 @@ __log_file_server(void *arg) } if (0) { -err: __wt_err(session, ret, "log close server error"); +err: WT_PANIC_MSG(session, ret, "log close server error"); } if (locked) __wt_spin_unlock(session, &log->log_sync_lock); @@ -740,7 +740,8 @@ __log_wrlsn_server(void *arg) WT_ERR(__wt_log_force_write(session, 1, NULL)); __wt_log_wrlsn(session, NULL); if (0) { -err: __wt_err(session, ret, "log wrlsn server error"); +err: WT_PANIC_MSG(session, ret, "log wrlsn server error"); + } return (WT_THREAD_RET_VALUE); } @@ -844,7 +845,7 @@ __log_server(void *arg) } if (0) { -err: __wt_err(session, ret, "log server error"); +err: WT_PANIC_MSG(session, ret, "log server error"); } return (WT_THREAD_RET_VALUE); } -- cgit v1.2.1 From 9f316461e28428350202699c5ba9e336b8770718 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Sat, 8 Apr 2017 00:56:28 +1000 Subject: WT-3262 Don't check if the cache is full when accessing metadata. (#3376) Also don't check for a full cache while holding the table lock (we're likely reading the metadata in that case, just being extra careful). --- src/include/cache.i | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/include/cache.i b/src/include/cache.i index 899507ceae6..1e058a3ec1b 100644 --- a/src/include/cache.i +++ b/src/include/cache.i @@ -360,11 +360,13 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool *didworkp) /* * LSM sets the no-cache-check flag when holding the LSM tree lock, in - * that case, or when holding the schema or handle list locks (which - * block eviction), we don't want to highjack the thread for eviction. + * that case, or when holding the handle list, schema or table locks + * (which can block checkpoints and eviction), don't block the thread + * for eviction. */ if (F_ISSET(session, WT_SESSION_NO_EVICTION | - WT_SESSION_LOCKED_HANDLE_LIST_WRITE | WT_SESSION_LOCKED_SCHEMA)) + WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_LOCKED_SCHEMA | + WT_SESSION_LOCKED_TABLE)) return (0); /* In memory configurations don't block when the cache is full. */ @@ -372,11 +374,14 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool *didworkp) return (0); /* - * Threads operating on cache-resident trees are ignored because they're - * not contributing to the problem. + * Threads operating on cache-resident trees are ignored because + * they're not contributing to the problem. We also don't block while + * reading metadata because we're likely to be holding some other + * resources that could block checkpoints or eviction. */ btree = S2BT_SAFE(session); - if (btree != NULL && F_ISSET(btree, WT_BTREE_IN_MEMORY)) + if (btree != NULL && (F_ISSET(btree, WT_BTREE_IN_MEMORY) || + WT_IS_METADATA(session->dhandle))) return (0); /* Check if eviction is needed. */ -- cgit v1.2.1 From 84e6ac0e67019bba22af87b99b40bb0bc0e21157 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Sat, 8 Apr 2017 00:58:05 +1000 Subject: WT-3265 Allow eviction of recently split pages when tree is locked. (#3372) When pages split in WiredTiger, internal pages cannot be evicted immediately because there is a chance that a reader is still looking at an index pointing to the page. We check for this when considering pages for eviction, and assert that we never evict an internal page in an active generation. However, if a page splits and then we try to get exclusive access to the tree (e.g., to verify it), we could fail to evict the tree from cache even though we have guaranteed exclusive access to it. Relax the check on internal pages to allow eviction from trees that are locked exclusive. --- src/include/btree.i | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/include/btree.i b/src/include/btree.i index 5e06dd36ae8..c169ddd1a9a 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1354,8 +1354,13 @@ __wt_page_can_evict( * the original parent page's index, because evicting an internal page * discards its WT_REF array, and a thread traversing the original * parent page index might see a freed WT_REF. + * + * One special case where we know this is safe is if the handle is + * locked exclusive (e.g., when the whole tree is being evicted). In + * that case, no readers can be looking at an old index. */ - if (WT_PAGE_IS_INTERNAL(page) && + if (!F_ISSET(session->dhandle, WT_DHANDLE_EXCLUSIVE) && + WT_PAGE_IS_INTERNAL(page) && page->pg_intl_split_gen >= __wt_gen_oldest(session, WT_GEN_SPLIT)) return (false); -- cgit v1.2.1 From 7a3e2484ec1ced43653cf33f4c68b0ebc8a0ee55 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Sat, 8 Apr 2017 00:56:28 +1000 Subject: WT-3262 Don't check if the cache is full when accessing metadata. (#3376) Also don't check for a full cache while holding the table lock (we're likely reading the metadata in that case, just being extra careful). --- src/include/cache.i | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/include/cache.i b/src/include/cache.i index d71978ccf35..90dd1bcdda8 100644 --- a/src/include/cache.i +++ b/src/include/cache.i @@ -360,11 +360,13 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool *didworkp) /* * LSM sets the no-cache-check flag when holding the LSM tree lock, in - * that case, or when holding the schema or handle list locks (which - * block eviction), we don't want to highjack the thread for eviction. + * that case, or when holding the handle list, schema or table locks + * (which can block checkpoints and eviction), don't block the thread + * for eviction. */ if (F_ISSET(session, WT_SESSION_NO_EVICTION | - WT_SESSION_LOCKED_HANDLE_LIST_WRITE | WT_SESSION_LOCKED_SCHEMA)) + WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_LOCKED_SCHEMA | + WT_SESSION_LOCKED_TABLE)) return (0); /* In memory configurations don't block when the cache is full. */ @@ -372,11 +374,14 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool *didworkp) return (0); /* - * Threads operating on cache-resident trees are ignored because they're - * not contributing to the problem. + * Threads operating on cache-resident trees are ignored because + * they're not contributing to the problem. We also don't block while + * reading metadata because we're likely to be holding some other + * resources that could block checkpoints or eviction. */ btree = S2BT_SAFE(session); - if (btree != NULL && F_ISSET(btree, WT_BTREE_IN_MEMORY)) + if (btree != NULL && (F_ISSET(btree, WT_BTREE_IN_MEMORY) || + WT_IS_METADATA(session->dhandle))) return (0); /* Check if eviction is needed. */ -- cgit v1.2.1 From d3ed5e9585a33af75c1c32b65e234bbb97b393b4 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Sat, 8 Apr 2017 00:58:05 +1000 Subject: WT-3265 Allow eviction of recently split pages when tree is locked. (#3372) (cherry picked from commit: 84e6ac0e67019bba22af87b99b40bb0bc0e21157) When pages split in WiredTiger, internal pages cannot be evicted immediately because there is a chance that a reader is still looking at an index pointing to the page. We check for this when considering pages for eviction, and assert that we never evict an internal page in an active generation. However, if a page splits and then we try to get exclusive access to the tree (e.g., to verify it), we could fail to evict the tree from cache even though we have guaranteed exclusive access to it. Relax the check on internal pages to allow eviction from trees that are locked exclusive. --- src/include/btree.i | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/include/btree.i b/src/include/btree.i index a4d88d5fda1..1d6fcd6272c 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1354,8 +1354,13 @@ __wt_page_can_evict( * the original parent page's index, because evicting an internal page * discards its WT_REF array, and a thread traversing the original * parent page index might see a freed WT_REF. + * + * One special case where we know this is safe is if the handle is + * locked exclusive (e.g., when the whole tree is being evicted). In + * that case, no readers can be looking at an old index. */ - if (WT_PAGE_IS_INTERNAL(page) && !__wt_split_obsolete( + if (!F_ISSET(session->dhandle, WT_DHANDLE_EXCLUSIVE) && + WT_PAGE_IS_INTERNAL(page) && !__wt_split_obsolete( session, page->pg_intl_split_gen)) return (false); -- cgit v1.2.1 From ab281e0a4f312f99494cdb381264196278c18a2f Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 7 Apr 2017 16:55:32 -0400 Subject: WT-3269 Miscellaneous cleanup (#3377) I missed two copyright notices when updating from 2016 to 2017. * Cleanup, one of the temporary copyright files wasn't being removed on exit. * Don't use S2C/S2BT if there's already a local conn/btree variable. * Fix some indentation, minor whitespace. * clang warning: Value stored to 'pgs_evicted_cur' is never read * __wt_thread_group_start_one and __wt_thread_group_stop_one no longer return errors, clean up error handling and comments in __evict_tune_workers, and __evict_tune_workers itself no longer returns an error. * Give test/syscall/wt2336_base/base.run a standard copyright notice, so dist/s_copyright doesn't have to treat it as a special case. --- dist/s_c_test_create | 2 +- dist/s_copyright | 7 ++++- ext/compressors/lz4/lz4_compress.c | 4 +-- src/btree/bt_split.c | 2 +- src/cache/cache_las.c | 4 +-- src/cursor/cur_stat.c | 2 +- src/evict/evict_lru.c | 42 ++++++++------------------ src/include/btree.i | 2 +- src/lsm/lsm_manager.c | 2 +- src/lsm/lsm_tree.c | 2 +- src/reconcile/rec_write.c | 3 +- src/txn/txn_ckpt.c | 2 +- src/txn/txn_recover.c | 2 +- test/checkpoint/workers.c | 2 +- test/format/wts.c | 4 +-- test/salvage/salvage.c | 2 +- test/syscall/wt2336_base/base.run | 61 ++++++++++++++++++++------------------ 17 files changed, 67 insertions(+), 78 deletions(-) diff --git a/dist/s_c_test_create b/dist/s_c_test_create index f4f9eb3ac1f..1d379664e75 100755 --- a/dist/s_c_test_create +++ b/dist/s_c_test_create @@ -35,7 +35,7 @@ mkdir $CSUITE_DIRECTORY/$TEST_NAME (cat <scr_alloc( - wt_api, session, (size_t)prefix.uncompressed_len)) == NULL) + wt_api, session, (size_t)prefix.uncompressed_len)) == NULL) return (ENOMEM); decoded = LZ4_decompress_safe( diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index db3e3f33abf..a5dd5ae9d0c 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -41,7 +41,7 @@ __split_safe_free(WT_SESSION_IMPL *session, { /* We should only call safe free if we aren't pinning the memory. */ WT_ASSERT(session, - __wt_session_gen(session, WT_GEN_SPLIT) != split_gen); + __wt_session_gen(session, WT_GEN_SPLIT) != split_gen); /* * We have swapped something in a page: if we don't have exclusive diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c index 68d7227a762..9e8545453d3 100644 --- a/src/cache/cache_las.c +++ b/src/cache/cache_las.c @@ -393,8 +393,8 @@ err: __wt_buf_free(session, key); * arithmetic is signed, so underflow isn't fatal, but check anyway so * we don't skew low over time. */ - if (remove_cnt > S2C(session)->las_record_cnt) - S2C(session)->las_record_cnt = 0; + if (remove_cnt > conn->las_record_cnt) + conn->las_record_cnt = 0; else if (remove_cnt > 0) (void)__wt_atomic_subi64(&conn->las_record_cnt, remove_cnt); diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c index 1c543023779..0f5b40bee40 100644 --- a/src/cursor/cur_stat.c +++ b/src/cursor/cur_stat.c @@ -478,7 +478,7 @@ __curstat_join_desc(WT_CURSOR_STAT *cst, int slot, const char **resultp) strlen(static_desc) + 1; WT_RET(__wt_realloc(session, NULL, len, &cst->desc_buf)); WT_RET(__wt_snprintf( - cst->desc_buf, len, "join: %s%s", sgrp->desc_prefix, static_desc)); + cst->desc_buf, len, "join: %s%s", sgrp->desc_prefix, static_desc)); *resultp = cst->desc_buf; return (0); } diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 8b003cd099e..1e67c7f22b4 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -15,7 +15,7 @@ static int __evict_lru_walk(WT_SESSION_IMPL *); static int __evict_page(WT_SESSION_IMPL *, bool); static int __evict_pass(WT_SESSION_IMPL *); static int __evict_server(WT_SESSION_IMPL *, bool *); -static int __evict_tune_workers(WT_SESSION_IMPL *session); +static void __evict_tune_workers(WT_SESSION_IMPL *session); static int __evict_walk(WT_SESSION_IMPL *, WT_EVICT_QUEUE *); static int __evict_walk_file( WT_SESSION_IMPL *, WT_EVICT_QUEUE *, u_int, u_int *); @@ -657,7 +657,7 @@ __evict_pass(WT_SESSION_IMPL *session) prev = now; if (conn->evict_threads.threads[0]->session == session) - WT_RET(__evict_tune_workers(session)); + __evict_tune_workers(session); /* * Increment the shared read generation. Do this occasionally * even if eviction is not currently required, so that pages @@ -956,13 +956,12 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) * curve. In that case, we will set the number of workers to the best observed * so far and settle into a stable state. */ -static int +static void __evict_tune_workers(WT_SESSION_IMPL *session) { struct timespec current_time; WT_CACHE *cache; WT_CONNECTION_IMPL *conn; - WT_DECL_RET; uint64_t cur_threads, delta_msec, delta_pages, i, target_threads; uint64_t pgs_evicted_cur, pgs_evicted_persec_cur, time_diff; uint32_t thread_surplus; @@ -971,7 +970,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) cache = conn->cache; WT_ASSERT(session, conn->evict_threads.threads[0]->session == session); - pgs_evicted_cur = pgs_evicted_persec_cur = 0; + pgs_evicted_cur = 0; __wt_epoch(session, ¤t_time); time_diff = WT_TIMEDIFF_SEC(current_time, conn->evict_tune_last_time); @@ -982,7 +981,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) */ if (conn->evict_tune_stable) { if (time_diff < EVICT_FORCE_RETUNE) - return (0); + return; /* * Stable state was reached a long time ago. Let's re-tune. @@ -1012,7 +1011,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) * anything unless enough time has passed since the last * time we have taken any action in this function. */ - return (0); + return; /* * Measure the number of evicted pages so far. Eviction rate correlates @@ -1028,7 +1027,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) * Otherwise, we just record the number of evicted pages and return. */ if (conn->evict_tune_pgs_last == 0) - goto err; + goto done; delta_msec = WT_TIMEDIFF_MS(current_time, conn->evict_tune_last_time); delta_pages = pgs_evicted_cur - conn->evict_tune_pgs_last; @@ -1079,11 +1078,6 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_tune_workers_best; for (i = 0; i < thread_surplus; i++) { - /* - * If we get an error, it should be because we - * were unable to acquire the thread group lock. - * Break out of trying. - */ __wt_thread_group_stop_one( session, &conn->evict_threads); WT_STAT_CONN_INCR(session, @@ -1095,7 +1089,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_tune_stable = true; WT_STAT_CONN_SET(session, cache_eviction_active_workers, conn->evict_threads.current_threads); - goto err; + goto done; } } @@ -1118,11 +1112,6 @@ __evict_tune_workers(WT_SESSION_IMPL *session) * Start the new threads. */ for (i = 0; i < (target_threads - cur_threads); ++i) { - /* - * If we get an error, it should be because we were - * unable to acquire the thread group lock. Break out - * of trying. - */ __wt_thread_group_start_one(session, &conn->evict_threads, false); WT_STAT_CONN_INCR(session, @@ -1136,15 +1125,8 @@ __evict_tune_workers(WT_SESSION_IMPL *session) WT_STAT_CONN_SET(session, cache_eviction_active_workers, conn->evict_threads.current_threads); -err: conn->evict_tune_last_time = current_time; +done: conn->evict_tune_last_time = current_time; conn->evict_tune_pgs_last = pgs_evicted_cur; - /* - * If we got an EBUSY trying to acquire the lock just return. - * We can try to tune the workers next time. - */ - if (ret == EBUSY) - ret = 0; - return (ret); } /* @@ -1163,13 +1145,13 @@ __evict_lru_pages(WT_SESSION_IMPL *session, bool is_server) * Reconcile and discard some pages: EBUSY is returned if a page fails * eviction because it's unavailable, continue in that case. */ - while (F_ISSET(S2C(session), WT_CONN_EVICTION_RUN) && ret == 0) + while (F_ISSET(conn, WT_CONN_EVICTION_RUN) && ret == 0) if ((ret = __evict_page(session, is_server)) == EBUSY) ret = 0; /* If a worker thread found the queue empty, pause. */ if (ret == WT_NOTFOUND && !is_server && - F_ISSET(S2C(session), WT_CONN_EVICTION_RUN)) + F_ISSET(conn, WT_CONN_EVICTION_RUN)) __wt_cond_wait( session, conn->evict_threads.wait_cond, 10000, NULL); @@ -1354,7 +1336,7 @@ __evict_walk(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue) bool dhandle_locked, incr; conn = S2C(session); - cache = S2C(session)->cache; + cache = conn->cache; btree = NULL; dhandle = NULL; dhandle_locked = incr = false; diff --git a/src/include/btree.i b/src/include/btree.i index c169ddd1a9a..e89c7809a79 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -424,7 +424,7 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page, bool rewrite) modify = page->modify; /* Update the bytes in-memory to reflect the eviction. */ - __wt_cache_decr_check_uint64(session, &S2BT(session)->bytes_inmem, + __wt_cache_decr_check_uint64(session, &btree->bytes_inmem, page->memory_footprint, "WT_BTREE.bytes_inmem"); __wt_cache_decr_check_uint64(session, &cache->bytes_inmem, page->memory_footprint, "WT_CACHE.bytes_inmem"); diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index d9726ac419b..82a6fc8f86c 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -388,7 +388,7 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) __wt_readlock(session, &conn->dhandle_lock); F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); dhandle_locked = true; - TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) { + TAILQ_FOREACH(lsm_tree, &conn->lsmqh, q) { if (!lsm_tree->active) continue; __wt_epoch(session, &now); diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 115ec2acc8a..d9c7a7d7284 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -499,7 +499,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session, __wt_epoch(session, &lsm_tree->last_flush_ts); /* Now the tree is setup, make it visible to others. */ - TAILQ_INSERT_HEAD(&S2C(session)->lsmqh, lsm_tree, q); + TAILQ_INSERT_HEAD(&conn->lsmqh, lsm_tree, q); if (!exclusive) lsm_tree->active = true; F_SET(lsm_tree, WT_LSM_TREE_OPEN); diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 47194478887..0740239758c 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -4386,8 +4386,7 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_RET(__rec_split_raw(session, r, val->len)); } else if (WT_CHECK_CROSSING_BND(r, val->len)) - WT_RET(__rec_split_crossing_bnd( - session, r, val->len)); + WT_RET(__rec_split_crossing_bnd(session, r, val->len)); /* Copy the value onto the page. */ if (!deleted && !overflow_type && btree->dictionary) diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 50684f1a75a..92dfd9e3887 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -1525,7 +1525,7 @@ err: /* */ if (ret != 0) { btree->modified = true; - S2C(session)->modified = true; + conn->modified = true; } __wt_meta_ckptlist_free(session, &btree->ckpt); diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c index e5d1aed7083..93f5fa5d15e 100644 --- a/src/txn/txn_recover.c +++ b/src/txn/txn_recover.c @@ -441,7 +441,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session) * last checkpoint was done with logging disabled, recovery should not * run. Scan the metadata to figure out the largest file ID. */ - if (!FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_EXISTED) || + if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_EXISTED) || WT_IS_MAX_LSN(&metafile->ckpt_lsn)) { WT_ERR(__recovery_file_scan(&r)); conn->next_file_id = r.max_fileid; diff --git a/test/checkpoint/workers.c b/test/checkpoint/workers.c index a055654df71..520266adf55 100644 --- a/test/checkpoint/workers.c +++ b/test/checkpoint/workers.c @@ -215,7 +215,7 @@ real_worker(void) } } else if (ret == WT_ROLLBACK) { if ((ret = session->rollback_transaction( - session, NULL)) != 0) { + session, NULL)) != 0) { (void)log_print_err( "real_worker:rollback_transaction", ret, 1); goto err; diff --git a/test/format/wts.c b/test/format/wts.c index c5346bd399e..2a8c6f54b06 100644 --- a/test/format/wts.c +++ b/test/format/wts.c @@ -276,8 +276,8 @@ wts_open(const char *home, bool set_api, WT_CONNECTION **connp) if ((ret = conn->load_extension( conn, HELIUM_PATH, helium_config)) != 0) testutil_die(ret, - "WT_CONNECTION.load_extension: %s:%s", - HELIUM_PATH, helium_config); + "WT_CONNECTION.load_extension: %s:%s", + HELIUM_PATH, helium_config); } *connp = conn; } diff --git a/test/salvage/salvage.c b/test/salvage/salvage.c index 2a99d8c4851..c19a529bcb8 100644 --- a/test/salvage/salvage.c +++ b/test/salvage/salvage.c @@ -522,7 +522,7 @@ build(int ikey, int ivalue, int cnt) break; case WT_PAGE_ROW_LEAF: testutil_check(__wt_snprintf( - kbuf, sizeof(kbuf), "%010d KEY------", ikey)); + kbuf, sizeof(kbuf), "%010d KEY------", ikey)); key.data = kbuf; key.size = 20; cursor->set_key(cursor, &key); diff --git a/test/syscall/wt2336_base/base.run b/test/syscall/wt2336_base/base.run index 7d2c42ce64e..db455c97474 100644 --- a/test/syscall/wt2336_base/base.run +++ b/test/syscall/wt2336_base/base.run @@ -1,32 +1,35 @@ -// Public Domain 2014-2016 MongoDB, Inc. -// Public Domain 2008-2014 WiredTiger, Inc. -// -// This is free and unencumbered software released into the public domain. -// -// Anyone is free to copy, modify, publish, use, compile, sell, or -// distribute this software, either in source code form or as a compiled -// binary, for any purpose, commercial or non-commercial, and by any -// means. -// -// In jurisdictions that recognize copyright laws, the author or authors -// of this software dedicate any and all copyright interest in the -// software to the public domain. We make this dedication for the benefit -// of the public at large and to the detriment of our heirs and -// successors. We intend this dedication to be an overt act of -// relinquishment in perpetuity of all present and future rights to this -// software under copyright law. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. -// -// base.run -// Command line syscall test runner -// +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * base.run + * Command line syscall test runner + */ #ifdef __linux__ SYSTEM("Linux"); #define OPEN_EXISTING(name, flags) open(name, flags) -- cgit v1.2.1 From 8f371403f0ccfae0188d7e4c2e6d629ade697b13 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Mon, 10 Apr 2017 23:54:54 +1000 Subject: WT-3271 Prevent integer overflow in eviction tuning. (#3379) --- src/evict/evict_lru.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 1e67c7f22b4..7ad9f377809 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -962,9 +962,9 @@ __evict_tune_workers(WT_SESSION_IMPL *session) struct timespec current_time; WT_CACHE *cache; WT_CONNECTION_IMPL *conn; - uint64_t cur_threads, delta_msec, delta_pages, i, target_threads; + uint64_t delta_msec, delta_pages; uint64_t pgs_evicted_cur, pgs_evicted_persec_cur, time_diff; - uint32_t thread_surplus; + int32_t cur_threads, i, target_threads, thread_surplus; conn = S2C(session); cache = conn->cache; @@ -995,8 +995,10 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_tune_workers_best = 0; /* Reduce the number of eviction workers to the minimum */ - thread_surplus = conn->evict_threads.current_threads - - conn->evict_threads_min; + thread_surplus = + (int32_t)conn->evict_threads.current_threads - + (int32_t)conn->evict_threads_min; + for (i = 0; i < thread_surplus; i++) { __wt_thread_group_stop_one( session, &conn->evict_threads); @@ -1054,18 +1056,18 @@ __evict_tune_workers(WT_SESSION_IMPL *session) * settle into a stable state. */ if (conn->evict_tune_num_points >= conn->evict_tune_datapts_needed) { - if ((conn->evict_tune_workers_best == - conn->evict_threads.current_threads) && - (conn->evict_threads.current_threads < - conn->evict_threads_max)) { + if (conn->evict_tune_workers_best == + conn->evict_threads.current_threads && + conn->evict_threads.current_threads < + conn->evict_threads_max) { /* * Keep adding workers. We will check again * at the next check point. */ - conn->evict_tune_datapts_needed += - WT_MIN(EVICT_TUNE_DATAPT_MIN, - (conn->evict_threads_max - - conn->evict_threads.current_threads) / + conn->evict_tune_datapts_needed += WT_MIN( + EVICT_TUNE_DATAPT_MIN, + (conn->evict_threads_max - + conn->evict_threads.current_threads) / EVICT_TUNE_BATCH); } else { /* @@ -1074,8 +1076,8 @@ __evict_tune_workers(WT_SESSION_IMPL *session) * settle into a stable state. */ thread_surplus = - conn->evict_threads.current_threads - - conn->evict_tune_workers_best; + (int32_t)conn->evict_threads.current_threads - + (int32_t)conn->evict_tune_workers_best; for (i = 0; i < thread_surplus; i++) { __wt_thread_group_stop_one( @@ -1105,13 +1107,13 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_threads.current_threads) / EVICT_TUNE_BATCH); if (F_ISSET(cache, WT_CACHE_EVICT_ALL)) { - cur_threads = conn->evict_threads.current_threads; + cur_threads = (int32_t)conn->evict_threads.current_threads; target_threads = WT_MIN(cur_threads + EVICT_TUNE_BATCH, - conn->evict_threads_max); + (int32_t)conn->evict_threads_max); /* * Start the new threads. */ - for (i = 0; i < (target_threads - cur_threads); ++i) { + for (i = cur_threads; i < target_threads; ++i) { __wt_thread_group_start_one(session, &conn->evict_threads, false); WT_STAT_CONN_INCR(session, -- cgit v1.2.1 From cb16839cfbdf338af95bed43ca40979ae6e32f54 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Mon, 10 Apr 2017 23:54:54 +1000 Subject: WT-3271 Prevent integer overflow in eviction tuning. (#3379) (cherry picked from: 8f371403f0ccfae0188d7e4c2e6d629ade697b13) --- src/evict/evict_lru.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 3ce35c60f2e..26bbf9f679b 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -934,9 +934,9 @@ __evict_tune_workers(WT_SESSION_IMPL *session) WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; - uint64_t cur_threads, delta_msec, delta_pages, i, target_threads; + uint64_t delta_msec, delta_pages; uint64_t pgs_evicted_cur, pgs_evicted_persec_cur, time_diff; - uint32_t thread_surplus; + int32_t cur_threads, i, target_threads, thread_surplus; conn = S2C(session); cache = conn->cache; @@ -967,8 +967,10 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_tune_workers_best = 0; /* Reduce the number of eviction workers to the minimum */ - thread_surplus = conn->evict_threads.current_threads - - conn->evict_threads_min; + thread_surplus = + (int32_t)conn->evict_threads.current_threads - + (int32_t)conn->evict_threads_min; + for (i = 0; i < thread_surplus; i++) { WT_ERR(__wt_thread_group_stop_one( session, &conn->evict_threads, false)); @@ -1026,18 +1028,18 @@ __evict_tune_workers(WT_SESSION_IMPL *session) * settle into a stable state. */ if (conn->evict_tune_num_points >= conn->evict_tune_datapts_needed) { - if ((conn->evict_tune_workers_best == - conn->evict_threads.current_threads) && - (conn->evict_threads.current_threads < - conn->evict_threads_max)) { + if (conn->evict_tune_workers_best == + conn->evict_threads.current_threads && + conn->evict_threads.current_threads < + conn->evict_threads_max) { /* * Keep adding workers. We will check again * at the next check point. */ - conn->evict_tune_datapts_needed += - WT_MIN(EVICT_TUNE_DATAPT_MIN, - (conn->evict_threads_max - - conn->evict_threads.current_threads) / + conn->evict_tune_datapts_needed += WT_MIN( + EVICT_TUNE_DATAPT_MIN, + (conn->evict_threads_max - + conn->evict_threads.current_threads) / EVICT_TUNE_BATCH); } else { /* @@ -1046,8 +1048,8 @@ __evict_tune_workers(WT_SESSION_IMPL *session) * settle into a stable state. */ thread_surplus = - conn->evict_threads.current_threads - - conn->evict_tune_workers_best; + (int32_t)conn->evict_threads.current_threads - + (int32_t)conn->evict_tune_workers_best; for (i = 0; i < thread_surplus; i++) { /* @@ -1082,13 +1084,13 @@ __evict_tune_workers(WT_SESSION_IMPL *session) conn->evict_threads.current_threads) / EVICT_TUNE_BATCH); if (F_ISSET(cache, WT_CACHE_EVICT_ALL)) { - cur_threads = conn->evict_threads.current_threads; + cur_threads = (int32_t)conn->evict_threads.current_threads; target_threads = WT_MIN(cur_threads + EVICT_TUNE_BATCH, - conn->evict_threads_max); + (int32_t)conn->evict_threads_max); /* * Start the new threads. */ - for (i = 0; i < (target_threads - cur_threads); ++i) { + for (i = cur_threads; i < target_threads; ++i) { /* * If we get an error, it should be because we were * unable to acquire the thread group lock. Break out -- cgit v1.2.1 From 36ee37cf90e5173750287960217f4c72e0aa7c14 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 10 Apr 2017 12:07:18 -0400 Subject: WT-3197 aarch64 CRC32C support fails to compile on non-linux ARM platforms (#3380) getauxval is a Linux system call, don't attempt to call it unless we're running on a Linux system. --- src/checksum/arm64/crc32-arm64.c | 2 +- src/checksum/zseries/crc32-s390x.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/checksum/arm64/crc32-arm64.c b/src/checksum/arm64/crc32-arm64.c index 3584b296139..59df504f907 100644 --- a/src/checksum/arm64/crc32-arm64.c +++ b/src/checksum/arm64/crc32-arm64.c @@ -91,7 +91,7 @@ __wt_checksum_hw(const void *chunk, size_t len) void __wt_checksum_init(void) { -#if defined(HAVE_CRC32_HARDWARE) +#if defined(__linux__) && defined(HAVE_CRC32_HARDWARE) unsigned long caps = getauxval(AT_HWCAP); if (caps & HWCAP_CRC32) diff --git a/src/checksum/zseries/crc32-s390x.c b/src/checksum/zseries/crc32-s390x.c index 28b46594220..2af443fec04 100644 --- a/src/checksum/zseries/crc32-s390x.c +++ b/src/checksum/zseries/crc32-s390x.c @@ -100,7 +100,7 @@ __wt_checksum_hw(const void *chunk, size_t len) void __wt_checksum_init(void) { -#if defined(HAVE_CRC32_HARDWARE) +#if defined(__linux__) && defined(HAVE_CRC32_HARDWARE) unsigned long caps = getauxval(AT_HWCAP); if (caps & HWCAP_S390_VX) -- cgit v1.2.1 From 314e21eea8adaf5ac8dc681aa9dbff393cce3b4a Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 10 Apr 2017 14:43:44 -0400 Subject: WT-3197 aarch64 CRC32C support fails to compile on non-linux ARM platforms (#3381) First change was insufficient, need to test __linux__ when including header files. --- src/checksum/arm64/crc32-arm64.c | 8 ++++---- src/checksum/zseries/crc32-s390x.c | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/checksum/arm64/crc32-arm64.c b/src/checksum/arm64/crc32-arm64.c index 59df504f907..4316ee3d14e 100644 --- a/src/checksum/arm64/crc32-arm64.c +++ b/src/checksum/arm64/crc32-arm64.c @@ -28,7 +28,7 @@ #include "wt_internal.h" -#if defined(HAVE_CRC32_HARDWARE) +#if defined(__linux__) && defined(HAVE_CRC32_HARDWARE) #include #include @@ -82,7 +82,7 @@ __wt_checksum_hw(const void *chunk, size_t len) return (~crc); } -#endif /* HAVE_CRC32_HARDWARE */ +#endif /* * __wt_checksum_init -- @@ -99,7 +99,7 @@ __wt_checksum_init(void) else __wt_process.checksum = __wt_checksum_sw; -#else /* !HAVE_CRC32_HARDWARE */ +#else __wt_process.checksum = __wt_checksum_sw; -#endif /* HAVE_CRC32_HARDWARE */ +#endif } diff --git a/src/checksum/zseries/crc32-s390x.c b/src/checksum/zseries/crc32-s390x.c index 2af443fec04..ae024391ff7 100644 --- a/src/checksum/zseries/crc32-s390x.c +++ b/src/checksum/zseries/crc32-s390x.c @@ -11,8 +11,7 @@ #include #include -#if defined(HAVE_CRC32_HARDWARE) - +#if defined(__linux__) && defined(HAVE_CRC32_HARDWARE) #include /* RHEL 7 has kernel support, but does not define this constant in the lib c headers. */ @@ -108,7 +107,7 @@ __wt_checksum_init(void) else __wt_process.checksum = __wt_checksum_sw; -#else /* !HAVE_CRC32_HARDWARE */ +#else __wt_process.checksum = __wt_checksum_sw; #endif } -- cgit v1.2.1 From 3556aa5464ccbb6351e6bb22fb1e94493dd34c89 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 11 Apr 2017 01:29:35 -0400 Subject: WT-3063 Reserve records for read-modify-write (#3352) --- dist/api_err.py | 16 ++-- dist/s_define.list | 1 + dist/s_string.ok | 5 + examples/c/ex_all.c | 10 ++ lang/java/java_doc.i | 1 + src/btree/bt_cursor.c | 99 ++++++++++++++------ src/btree/bt_debug.c | 2 + src/btree/bt_read.c | 9 +- src/btree/bt_ret.c | 10 +- src/btree/bt_split.c | 4 +- src/btree/bt_stat.c | 19 +++- src/btree/col_modify.c | 34 +++---- src/btree/row_modify.c | 45 ++++----- src/conn/api_strerror.c | 16 ++-- src/conn/conn_dhandle.c | 4 +- src/cursor/cur_backup.c | 1 + src/cursor/cur_bulk.c | 18 ++-- src/cursor/cur_config.c | 1 + src/cursor/cur_ds.c | 9 +- src/cursor/cur_dump.c | 1 + src/cursor/cur_file.c | 70 +++++++++----- src/cursor/cur_index.c | 5 +- src/cursor/cur_join.c | 3 + src/cursor/cur_log.c | 1 + src/cursor/cur_metadata.c | 5 +- src/cursor/cur_stat.c | 7 +- src/cursor/cur_std.c | 9 +- src/cursor/cur_table.c | 59 ++++++++++-- src/include/btmem.h | 18 ++-- src/include/cursor.h | 54 +---------- src/include/cursor.i | 118 ++++++++++++++++++----- src/include/extern.h | 11 ++- src/include/misc.i | 18 ++++ src/include/txn.i | 12 ++- src/include/wiredtiger.in | 17 ++++ src/lsm/lsm_cursor.c | 87 ++++++++++++----- src/reconcile/rec_write.c | 9 +- src/schema/schema_truncate.c | 4 +- src/session/session_api.c | 24 ++--- src/session/session_compact.c | 6 +- src/txn/txn.c | 29 ++++-- src/txn/txn_log.c | 26 +++--- test/csuite/scope/main.c | 4 - test/format/ops.c | 8 +- test/suite/test_reserve.py | 211 ++++++++++++++++++++++++++++++++++++++++++ test/suite/wtdataset.py | 10 +- 46 files changed, 810 insertions(+), 320 deletions(-) create mode 100644 test/suite/test_reserve.py diff --git a/dist/api_err.py b/dist/api_err.py index bd379ac8d70..06887476dbc 100644 --- a/dist/api_err.py +++ b/dist/api_err.py @@ -112,8 +112,6 @@ tfile.write('''/* DO NOT EDIT: automatically built by dist/api_err.py. */ const char * __wt_wiredtiger_error(int error) { -\tconst char *p; - \t/* \t * Check for WiredTiger specific errors. \t */ @@ -125,14 +123,20 @@ for err in errors: tfile.write('\t\treturn ("' + err.name + ': ' + err.desc + '");\n') tfile.write('''\t} +\t/* Windows strerror doesn't support ENOTSUP. */ +\tif (error == ENOTSUP) +\t\treturn ("Operation not supported"); + \t/* -\t * POSIX errors are non-negative integers; check for 0 explicitly incase -\t * the underlying strerror doesn't handle 0, some historically didn't. +\t * Check for 0 in case the underlying strerror doesn't handle it, some +\t * historically didn't. \t */ \tif (error == 0) \t\treturn ("Successful return: 0"); -\tif (error > 0 && (p = strerror(error)) != NULL) -\t\treturn (p); + +\t/* POSIX errors are non-negative integers. */ +\tif (error > 0) +\t\treturn (strerror(error)); \treturn (NULL); } diff --git a/dist/s_define.list b/dist/s_define.list index 8911d888077..b7f124ef18c 100644 --- a/dist/s_define.list +++ b/dist/s_define.list @@ -58,6 +58,7 @@ WT_STAT_INCRV_BASE WT_STAT_WRITE WT_TIMEDIFF_US WT_TRET_ERROR_OK +WT_UPDATE_RESERVED_VALUE WT_WITH_LOCK_NOWAIT WT_WITH_LOCK_WAIT __F diff --git a/dist/s_string.ok b/dist/s_string.ok index 1f7f7d9fd3a..1285d70897e 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -515,11 +515,13 @@ change's changelog chdir checkfmt +checkkey checkpointed checkpointer checkpointing checksum checksums +checkvalue children's chk chmod @@ -938,6 +940,8 @@ nbits nchunks nclr nd +needkey +needvalue negint newbar newfile @@ -957,6 +961,7 @@ noraw notfound notsup notused +novalue nowait nset nsnap diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c index b568d1dd63c..f94863584e8 100644 --- a/examples/c/ex_all.c +++ b/examples/c/ex_all.c @@ -298,6 +298,16 @@ cursor_ops(WT_SESSION *session) /*! [Insert a new record and assign a record number] */ } + { + /*! [Reserve a record] */ + const char *key = "some key"; + ret = session->open_cursor( + session, "table:mytable", NULL, NULL, &cursor); + cursor->set_key(cursor, key); + ret = cursor->reserve(cursor); + /*! [Reserve a record] */ + } + { /*! [Update an existing record or insert a new record] */ const char *key = "some key", *value = "some value"; diff --git a/lang/java/java_doc.i b/lang/java/java_doc.i index 3606bed1d69..8088abbf065 100644 --- a/lang/java/java_doc.i +++ b/lang/java/java_doc.i @@ -14,6 +14,7 @@ COPYDOC(__wt_cursor, WT_CURSOR, search_near) COPYDOC(__wt_cursor, WT_CURSOR, insert) COPYDOC(__wt_cursor, WT_CURSOR, update) COPYDOC(__wt_cursor, WT_CURSOR, remove) +COPYDOC(__wt_cursor, WT_CURSOR, reserve) COPYDOC(__wt_cursor, WT_CURSOR, close) COPYDOC(__wt_cursor, WT_CURSOR, reconfigure) COPYDOC(__wt_async_op, WT_ASYNC_OP, get_key) diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 408a00f136d..41701cc0a73 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -365,11 +365,11 @@ __cursor_row_search( * Column-store delete, insert, and update from an application cursor. */ static inline int -__cursor_col_modify( - WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool is_remove) +__cursor_col_modify(WT_SESSION_IMPL *session, + WT_CURSOR_BTREE *cbt, bool is_remove, bool is_reserve) { - return (__wt_col_modify(session, - cbt, cbt->iface.recno, &cbt->iface.value, NULL, is_remove)); + return (__wt_col_modify(session, cbt, + cbt->iface.recno, &cbt->iface.value, NULL, is_remove, is_reserve)); } /* @@ -377,11 +377,11 @@ __cursor_col_modify( * Row-store insert, update and delete from an application cursor. */ static inline int -__cursor_row_modify( - WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool is_remove) +__cursor_row_modify(WT_SESSION_IMPL *session, + WT_CURSOR_BTREE *cbt, bool is_remove, bool is_reserve) { - return (__wt_row_modify(session, - cbt, &cbt->iface.key, &cbt->iface.value, NULL, is_remove)); + return (__wt_row_modify(session, cbt, + &cbt->iface.key, &cbt->iface.value, NULL, is_remove, is_reserve)); } /* @@ -676,8 +676,8 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) */ cbt->compare = 0; ret = btree->type == BTREE_ROW ? - __cursor_row_modify(session, cbt, false) : - __cursor_col_modify(session, cbt, false); + __cursor_row_modify(session, cbt, false, false) : + __cursor_col_modify(session, cbt, false, false); if (ret == 0) goto done; @@ -712,7 +712,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) WT_ERR(WT_DUPLICATE_KEY); - ret = __cursor_row_modify(session, cbt, false); + ret = __cursor_row_modify(session, cbt, false, false); } else { /* * Optionally insert a new record (ignoring the application's @@ -735,7 +735,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); (cbt->compare != 0 && __cursor_fix_implicit(btree, cbt)))) WT_ERR(WT_DUPLICATE_KEY); - WT_ERR(__cursor_col_modify(session, cbt, false)); + WT_ERR(__cursor_col_modify(session, cbt, false, false)); if (append_key) cbt->iface.recno = cbt->recno; @@ -891,8 +891,8 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) */ cbt->compare = 0; ret = btree->type == BTREE_ROW ? - __cursor_row_modify(session, cbt, true) : - __cursor_col_modify(session, cbt, true); + __cursor_row_modify(session, cbt, true, false) : + __cursor_col_modify(session, cbt, true, false); if (ret == 0) goto done; @@ -926,7 +926,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) WT_ERR(WT_NOTFOUND); - ret = __cursor_row_modify(session, cbt, true); + ret = __cursor_row_modify(session, cbt, true, false); } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); @@ -953,7 +953,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); */ cbt->recno = cursor->recno; } else - ret = __cursor_col_modify(session, cbt, true); + ret = __cursor_col_modify(session, cbt, true, false); } err: if (ret == WT_RESTART) { @@ -987,11 +987,11 @@ done: /* } /* - * __wt_btcur_update -- + * __btcur_update -- * Update a record in the tree. */ -int -__wt_btcur_update(WT_CURSOR_BTREE *cbt) +static int +__btcur_update(WT_CURSOR_BTREE *cbt, bool is_reserve) { WT_BTREE *btree; WT_CURFILE_STATE state; @@ -1011,7 +1011,8 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt) if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); - WT_RET(__cursor_size_chk(session, &cursor->value)); + if (!is_reserve) + WT_RET(__cursor_size_chk(session, &cursor->value)); /* It's no longer possible to bulk-load into the tree. */ __cursor_disable_bulk(session, btree); @@ -1033,8 +1034,8 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt) */ cbt->compare = 0; ret = btree->type == BTREE_ROW ? - __cursor_row_modify(session, cbt, false) : - __cursor_col_modify(session, cbt, false); + __cursor_row_modify(session, cbt, false, is_reserve) : + __cursor_col_modify(session, cbt, false, is_reserve); if (ret == 0) goto done; @@ -1070,7 +1071,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) WT_ERR(WT_NOTFOUND); } - ret = __cursor_row_modify(session, cbt, false); + ret = __cursor_row_modify(session, cbt, false, is_reserve); } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); @@ -1089,7 +1090,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); !__cursor_fix_implicit(btree, cbt)) WT_ERR(WT_NOTFOUND); } - ret = __cursor_col_modify(session, cbt, false); + ret = __cursor_col_modify(session, cbt, false, is_reserve); } err: if (ret == WT_RESTART) { @@ -1106,8 +1107,14 @@ err: if (ret == WT_RESTART) { * To make this work, we add a field to the btree cursor to pass back a * pointer to the modify function's allocated update structure. */ -done: if (ret == 0) - WT_TRET(__wt_kv_return(session, cbt, cbt->modify_update)); +done: if (ret == 0) { + if (is_reserve) { + F_CLR(cursor, WT_CURSTD_VALUE_SET); + WT_TRET(__wt_key_return(session, cbt)); + } else + WT_TRET( + __wt_kv_return(session, cbt, cbt->modify_update)); + } if (ret != 0) { WT_TRET(__cursor_reset(cbt)); @@ -1117,6 +1124,38 @@ done: if (ret == 0) return (ret); } +/* + * __wt_btcur_reserve -- + * Reserve a record in the tree. + */ +int +__wt_btcur_reserve(WT_CURSOR_BTREE *cbt) +{ + WT_CURSOR *cursor; + WT_DECL_RET; + bool overwrite; + + cursor = &cbt->iface; + + /* WT_CURSOR.reserve is update-without-overwrite and a special value. */ + overwrite = F_ISSET(cursor, WT_CURSTD_OVERWRITE); + F_CLR(cursor, WT_CURSTD_OVERWRITE); + ret = __btcur_update(cbt, true); + if (overwrite) + F_SET(cursor, WT_CURSTD_OVERWRITE); + return (ret); +} + +/* + * __wt_btcur_update -- + * Update a record in the tree. + */ +int +__wt_btcur_update(WT_CURSOR_BTREE *cbt) +{ + return (__btcur_update(cbt, false)); +} + /* * __wt_btcur_compare -- * Return a comparison between two cursors. @@ -1237,7 +1276,7 @@ __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) static int __cursor_truncate(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, - int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, bool)) + int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, bool, bool)) { WT_DECL_RET; @@ -1265,7 +1304,7 @@ retry: WT_RET(__wt_btcur_search(start)); F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); for (;;) { - if ((ret = rmfunc(session, start, 1)) != 0) + if ((ret = rmfunc(session, start, true, false)) != 0) break; if (stop != NULL && __cursor_equals(start, stop)) @@ -1292,7 +1331,7 @@ retry: WT_RET(__wt_btcur_search(start)); static int __cursor_truncate_fix(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, - int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, bool)) + int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, bool, bool)) { WT_DECL_RET; const uint8_t *value; @@ -1323,7 +1362,7 @@ retry: WT_RET(__wt_btcur_search(start)); for (;;) { value = (const uint8_t *)start->iface.value.data; if (*value != 0 && - (ret = rmfunc(session, start, 1)) != 0) + (ret = rmfunc(session, start, true, false)) != 0) break; if (stop != NULL && __cursor_equals(start, stop)) diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index 60e208c171c..538c363a864 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -987,6 +987,8 @@ __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte) for (; upd != NULL; upd = upd->next) if (WT_UPDATE_DELETED_ISSET(upd)) WT_RET(ds->f(ds, "\tvalue {deleted}\n")); + else if (WT_UPDATE_RESERVED_ISSET(upd)) + WT_RET(ds->f(ds, "\tvalue {reserved}\n")); else if (hexbyte) { WT_RET(ds->f(ds, "\t{")); WT_RET(__debug_hex_byte(ds, diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index 8de0d916095..fb69afb166c 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -90,7 +90,7 @@ __col_instantiate(WT_SESSION_IMPL *session, { /* Search the page and add updates. */ WT_RET(__wt_col_search(session, recno, ref, cbt)); - WT_RET(__wt_col_modify(session, cbt, recno, NULL, upd, false)); + WT_RET(__wt_col_modify(session, cbt, recno, NULL, upd, false, false)); return (0); } @@ -104,7 +104,7 @@ __row_instantiate(WT_SESSION_IMPL *session, { /* Search the page and add updates. */ WT_RET(__wt_row_search(session, key, ref, cbt, true)); - WT_RET(__wt_row_modify(session, cbt, key, NULL, upd, false)); + WT_RET(__wt_row_modify(session, cbt, key, NULL, upd, false, false)); return (0); } @@ -189,9 +189,8 @@ __las_page_instantiate(WT_SESSION_IMPL *session, /* Allocate the WT_UPDATE structure. */ WT_ERR(cursor->get_value( cursor, &upd_txnid, &upd_size, las_value)); - WT_ERR(__wt_update_alloc(session, - (upd_size == WT_UPDATE_DELETED_VALUE) ? NULL : las_value, - &upd, &incr)); + WT_ERR(__wt_update_alloc(session, las_value, + &upd, &incr, upd_size == WT_UPDATE_DELETED_VALUE, false)); total_incr += incr; upd->txnid = upd_txnid; diff --git a/src/btree/bt_ret.c b/src/btree/bt_ret.c index a761c08eee6..7212de72d6e 100644 --- a/src/btree/bt_ret.c +++ b/src/btree/bt_ret.c @@ -147,9 +147,13 @@ __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) cursor = &cbt->iface; /* - * We may already have an internal key, in which case the cursor may - * not be set up to get another copy (for example, when we rely on a - * search-function result). + * We may already have an internal key and the cursor may not be set up + * to get another copy, so we have to leave it alone. Consider a cursor + * search followed by an update: the update doesn't repeat the search, + * it simply updates the currently referenced key's value. We will end + * up here with the correct internal key, but we can't "return" the key + * again even if we wanted to do the additional work, the cursor isn't + * set up for that because we didn't just complete a search. */ F_CLR(cursor, WT_CURSTD_KEY_EXT); if (!F_ISSET(cursor, WT_CURSTD_KEY_INT)) { diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index a5dd5ae9d0c..23210a556da 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -1425,7 +1425,7 @@ __split_multi_inmem( /* Apply the modification. */ WT_ERR(__wt_col_modify( - session, &cbt, recno, NULL, upd, false)); + session, &cbt, recno, NULL, upd, false, false)); break; case WT_PAGE_ROW_LEAF: /* Build a key. */ @@ -1447,7 +1447,7 @@ __split_multi_inmem( /* Apply the modification. */ WT_ERR(__wt_row_modify( - session, &cbt, key, NULL, upd, false)); + session, &cbt, key, NULL, upd, false, false)); break; WT_ILLEGAL_VALUE_ERR(session); } diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c index aba5b1349c5..2b9c9bef8a2 100644 --- a/src/btree/bt_stat.c +++ b/src/btree/bt_stat.c @@ -178,6 +178,8 @@ __stat_page_col_var( */ WT_SKIP_FOREACH(ins, WT_COL_UPDATE(page, cip)) { upd = ins->upd; + if (WT_UPDATE_RESERVED_ISSET(upd)) + continue; if (WT_UPDATE_DELETED_ISSET(upd)) { if (!orig_deleted) { ++deleted_cnt; @@ -192,11 +194,14 @@ __stat_page_col_var( } /* Walk any append list. */ - WT_SKIP_FOREACH(ins, WT_COL_APPEND(page)) + WT_SKIP_FOREACH(ins, WT_COL_APPEND(page)) { + if (WT_UPDATE_RESERVED_ISSET(ins->upd)) + continue; if (WT_UPDATE_DELETED_ISSET(ins->upd)) ++deleted_cnt; else ++entry_cnt; + } WT_STAT_INCRV(session, stats, btree_column_deleted, deleted_cnt); WT_STAT_INCRV(session, stats, btree_column_rle, rle_cnt); @@ -263,7 +268,8 @@ __stat_page_row_leaf( * key on the page. */ WT_SKIP_FOREACH(ins, WT_ROW_INSERT_SMALLEST(page)) - if (!WT_UPDATE_DELETED_ISSET(ins->upd)) + if (!WT_UPDATE_DELETED_ISSET(ins->upd) && + !WT_UPDATE_RESERVED_ISSET(ins->upd)) ++entry_cnt; /* @@ -272,16 +278,19 @@ __stat_page_row_leaf( */ WT_ROW_FOREACH(page, rip, i) { upd = WT_ROW_UPDATE(page, rip); - if (upd == NULL || !WT_UPDATE_DELETED_ISSET(upd)) + if (upd == NULL || + (!WT_UPDATE_DELETED_ISSET(upd) && + !WT_UPDATE_RESERVED_ISSET(upd))) ++entry_cnt; if (upd == NULL && (cell = __wt_row_leaf_value_cell(page, rip, NULL)) != NULL && __wt_cell_type(cell) == WT_CELL_VALUE_OVFL) - ++ovfl_cnt; + ++ovfl_cnt; /* Walk K/V pairs inserted after the on-page K/V pair. */ WT_SKIP_FOREACH(ins, WT_ROW_INSERT(page, rip)) - if (!WT_UPDATE_DELETED_ISSET(ins->upd)) + if (!WT_UPDATE_DELETED_ISSET(ins->upd) && + !WT_UPDATE_RESERVED_ISSET(ins->upd)) ++entry_cnt; } diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c index 5b4ce9a86ad..b45f369f1c2 100644 --- a/src/btree/col_modify.c +++ b/src/btree/col_modify.c @@ -17,13 +17,14 @@ static int __col_insert_alloc( */ int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, - uint64_t recno, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove) + uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, + bool is_remove, bool is_reserve) { + static const WT_ITEM col_fix_remove = { "", 1, NULL, 0, 0 }; WT_BTREE *btree; WT_DECL_RET; WT_INSERT *ins; WT_INSERT_HEAD *ins_head, **ins_headp; - WT_ITEM _value; WT_PAGE *page; WT_PAGE_MODIFY *mod; WT_UPDATE *old_upd, *upd; @@ -37,14 +38,15 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, upd = upd_arg; append = logged = false; - /* This code expects a remove to have a NULL value. */ - if (is_remove) { - if (btree->type == BTREE_COL_FIX) { - value = &_value; - value->data = ""; - value->size = 1; - } else - value = NULL; + if (is_remove || is_reserve) { + /* + * Fixed-size column-store doesn't have on-page deleted values, + * it's a nul byte. + */ + if (is_remove && btree->type == BTREE_COL_FIX) { + is_remove = false; + value = &col_fix_remove; + } } else { /* * There's some chance the application specified a record past @@ -83,11 +85,11 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ASSERT(session, upd_arg == NULL); /* Make sure the update can proceed. */ - WT_ERR(__wt_txn_update_check( - session, old_upd = cbt->ins->upd)); + WT_ERR(__wt_txn_update_check(session, old_upd = cbt->ins->upd)); /* Allocate a WT_UPDATE structure and transaction ID. */ - WT_ERR(__wt_update_alloc(session, value, &upd, &upd_size)); + WT_ERR(__wt_update_alloc(session, + value, &upd, &upd_size, is_remove, is_reserve)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -147,8 +149,8 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, mod->mod_col_split_recno > recno)); if (upd_arg == NULL) { - WT_ERR( - __wt_update_alloc(session, value, &upd, &upd_size)); + WT_ERR(__wt_update_alloc(session, + value, &upd, &upd_size, is_remove, is_reserve)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -193,7 +195,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, } /* If the update was successful, add it to the in-memory log. */ - if (logged) + if (logged && !is_reserve) WT_ERR(__wt_txn_log_op(session, cbt)); if (0) { diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c index 2d1e4902836..d3b087f92c6 100644 --- a/src/btree/row_modify.c +++ b/src/btree/row_modify.c @@ -47,7 +47,8 @@ __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) */ int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, - WT_ITEM *key, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove) + const WT_ITEM *key, const WT_ITEM *value, + WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) { WT_DECL_RET; WT_INSERT *ins; @@ -65,10 +66,6 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, upd = upd_arg; logged = false; - /* This code expects a remove to have a NULL value. */ - if (is_remove) - value = NULL; - /* If we don't yet have a modify structure, we'll need one. */ WT_RET(__wt_page_modify_init(session, page)); mod = page->modify; @@ -99,8 +96,8 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, session, old_upd = *upd_entry)); /* Allocate a WT_UPDATE structure and transaction ID. */ - WT_ERR( - __wt_update_alloc(session, value, &upd, &upd_size)); + WT_ERR(__wt_update_alloc(session, + value, &upd, &upd_size, is_remove, is_reserve)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -170,8 +167,8 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, cbt->ins = ins; if (upd_arg == NULL) { - WT_ERR( - __wt_update_alloc(session, value, &upd, &upd_size)); + WT_ERR(__wt_update_alloc(session, + value, &upd, &upd_size, is_remove, is_reserve)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -210,7 +207,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, &ins, ins_size, skipdepth)); } - if (logged) + if (logged && !is_reserve) WT_ERR(__wt_txn_log_op(session, cbt)); if (0) { @@ -235,7 +232,7 @@ err: /* */ int __wt_row_insert_alloc(WT_SESSION_IMPL *session, - WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) + const WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) { WT_INSERT *ins; size_t ins_size; @@ -263,11 +260,10 @@ __wt_row_insert_alloc(WT_SESSION_IMPL *session, * Allocate a WT_UPDATE structure and associated value and fill it in. */ int -__wt_update_alloc( - WT_SESSION_IMPL *session, WT_ITEM *value, WT_UPDATE **updp, size_t *sizep) +__wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, + WT_UPDATE **updp, size_t *sizep, bool is_remove, bool is_reserve) { WT_UPDATE *upd; - size_t size; *updp = NULL; @@ -275,14 +271,19 @@ __wt_update_alloc( * Allocate the WT_UPDATE structure and room for the value, then copy * the value into place. */ - size = value == NULL ? 0 : value->size; - WT_RET(__wt_calloc(session, 1, sizeof(WT_UPDATE) + size, &upd)); - if (value == NULL) - WT_UPDATE_DELETED_SET(upd); - else { - upd->size = WT_STORE_SIZE(size); - if (size != 0) - memcpy(WT_UPDATE_DATA(upd), value->data, size); + if (is_remove || is_reserve) { + WT_RET(__wt_calloc(session, 1, sizeof(WT_UPDATE), &upd)); + if (is_remove) + WT_UPDATE_DELETED_SET(upd); + if (is_reserve) + WT_UPDATE_RESERVED_SET(upd); + } else { + WT_RET(__wt_calloc( + session, 1, sizeof(WT_UPDATE) + value->size, &upd)); + if (value->size != 0) { + upd->size = WT_STORE_SIZE(value->size); + memcpy(WT_UPDATE_DATA(upd), value->data, value->size); + } } *updp = upd; diff --git a/src/conn/api_strerror.c b/src/conn/api_strerror.c index edb11957556..63f982deb07 100644 --- a/src/conn/api_strerror.c +++ b/src/conn/api_strerror.c @@ -18,8 +18,6 @@ const char * __wt_wiredtiger_error(int error) { - const char *p; - /* * Check for WiredTiger specific errors. */ @@ -42,14 +40,20 @@ __wt_wiredtiger_error(int error) return ("WT_CACHE_FULL: operation would overflow cache"); } + /* Windows strerror doesn't support ENOTSUP. */ + if (error == ENOTSUP) + return ("Operation not supported"); + /* - * POSIX errors are non-negative integers; check for 0 explicitly incase - * the underlying strerror doesn't handle 0, some historically didn't. + * Check for 0 in case the underlying strerror doesn't handle it, some + * historically didn't. */ if (error == 0) return ("Successful return: 0"); - if (error > 0 && (p = strerror(error)) != NULL) - return (p); + + /* POSIX errors are non-negative integers. */ + if (error > 0) + return (strerror(error)); return (NULL); } diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index e342e142039..4b4f4b8bc3f 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -364,8 +364,8 @@ __wt_conn_btree_open( F_SET(dhandle, WT_DHANDLE_OPEN); /* - * Checkpoint handles are read only, so eviction calculations - * based on the number of btrees are better to ignore them. + * Checkpoint handles are read-only, so eviction calculations based on + * the number of btrees are better to ignore them. */ if (dhandle->checkpoint == NULL) ++S2C(session)->open_btree_count; diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c index 52636e6ba61..a30cb6f0e17 100644 --- a/src/cursor/cur_backup.c +++ b/src/cursor/cur_backup.c @@ -121,6 +121,7 @@ __wt_curbackup_open(WT_SESSION_IMPL *session, __wt_cursor_notsup, /* insert */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ + __wt_cursor_notsup, /* reserve */ __wt_cursor_reconfigure_notsup, /* reconfigure */ __curbackup_close); /* close */ WT_CURSOR *cursor; diff --git a/src/cursor/cur_bulk.c b/src/cursor/cur_bulk.c index b9c190cc962..56bcbb741f7 100644 --- a/src/cursor/cur_bulk.c +++ b/src/cursor/cur_bulk.c @@ -58,11 +58,11 @@ __curbulk_insert_fix(WT_CURSOR *cursor) if (F_ISSET(cursor, WT_CURSTD_APPEND)) recno = cbulk->recno + 1; else { - WT_CURSOR_CHECKKEY(cursor); + WT_ERR(__cursor_checkkey(cursor)); if ((recno = cursor->recno) <= cbulk->recno) WT_ERR(__bulk_col_keycmp_err(cbulk)); } - WT_CURSOR_CHECKVALUE(cursor); + WT_ERR(__cursor_checkvalue(cursor)); /* * Insert any skipped records as deleted records, update the current @@ -101,7 +101,7 @@ __curbulk_insert_fix_bitmap(WT_CURSOR *cursor) CURSOR_API_CALL(cursor, session, insert, btree); WT_STAT_DATA_INCR(session, cursor_insert_bulk); - WT_CURSOR_CHECKVALUE(cursor); + WT_ERR(__cursor_checkvalue(cursor)); /* Insert the current record. */ ret = __wt_bulk_insert_fix_bitmap(session, cbulk); @@ -140,11 +140,11 @@ __curbulk_insert_var(WT_CURSOR *cursor) if (F_ISSET(cursor, WT_CURSTD_APPEND)) recno = cbulk->recno + 1; else { - WT_CURSOR_CHECKKEY(cursor); + WT_ERR(__cursor_checkkey(cursor)); if ((recno = cursor->recno) <= cbulk->recno) WT_ERR(__bulk_col_keycmp_err(cbulk)); } - WT_CURSOR_CHECKVALUE(cursor); + WT_ERR(__cursor_checkvalue(cursor)); if (!cbulk->first_insert) { /* @@ -241,8 +241,8 @@ __curbulk_insert_row(WT_CURSOR *cursor) CURSOR_API_CALL(cursor, session, insert, btree); WT_STAT_DATA_INCR(session, cursor_insert_bulk); - WT_CURSOR_CHECKKEY(cursor); - WT_CURSOR_CHECKVALUE(cursor); + WT_ERR(__cursor_checkkey(cursor)); + WT_ERR(__cursor_checkvalue(cursor)); /* * If this isn't the first key inserted, compare it against the last key @@ -288,8 +288,8 @@ __curbulk_insert_row_skip_check(WT_CURSOR *cursor) CURSOR_API_CALL(cursor, session, insert, btree); WT_STAT_DATA_INCR(session, cursor_insert_bulk); - WT_CURSOR_CHECKKEY(cursor); - WT_CURSOR_CHECKVALUE(cursor); + WT_ERR(__cursor_checkkey(cursor)); + WT_ERR(__cursor_checkvalue(cursor)); ret = __wt_bulk_insert_row(session, cbulk); diff --git a/src/cursor/cur_config.c b/src/cursor/cur_config.c index 8a88bb5449c..a0b87b2b3c6 100644 --- a/src/cursor/cur_config.c +++ b/src/cursor/cur_config.c @@ -41,6 +41,7 @@ __wt_curconfig_open(WT_SESSION_IMPL *session, __wt_cursor_notsup, /* insert */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ + __wt_cursor_notsup, /* reserve */ __wt_cursor_reconfigure_notsup, /* reconfigure */ __curconfig_close); WT_CURSOR_CONFIG *cconfig; diff --git a/src/cursor/cur_ds.c b/src/cursor/cur_ds.c index 9615d25ae18..e40cb30dd53 100644 --- a/src/cursor/cur_ds.c +++ b/src/cursor/cur_ds.c @@ -42,7 +42,7 @@ __curds_key_set(WT_CURSOR *cursor) source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source; - WT_CURSOR_NEEDKEY(cursor); + WT_ERR(__cursor_needkey(cursor)); source->recno = cursor->recno; source->key.data = cursor->key.data; @@ -63,7 +63,7 @@ __curds_value_set(WT_CURSOR *cursor) source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source; - WT_CURSOR_NEEDVALUE(cursor); + WT_ERR(__cursor_needvalue(cursor)); source->value.data = cursor->value.data; source->value.size = cursor->value.size; @@ -142,8 +142,8 @@ __curds_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) WT_ERR_MSG(session, EINVAL, "Cursors must reference the same object"); - WT_CURSOR_NEEDKEY(a); - WT_CURSOR_NEEDKEY(b); + WT_ERR(__cursor_needkey(a)); + WT_ERR(__cursor_needkey(b)); if (WT_CURSOR_RECNO(a)) { if (a->recno < b->recno) @@ -460,6 +460,7 @@ __wt_curds_open( __curds_insert, /* insert */ __curds_update, /* update */ __curds_remove, /* remove */ + __wt_cursor_notsup, /* reserve */ __wt_cursor_reconfigure_notsup, /* reconfigure */ __curds_close); /* close */ WT_CONFIG_ITEM cval, metadata; diff --git a/src/cursor/cur_dump.c b/src/cursor/cur_dump.c index 2fea6c5a60e..73328da6246 100644 --- a/src/cursor/cur_dump.c +++ b/src/cursor/cur_dump.c @@ -371,6 +371,7 @@ __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) __curdump_insert, /* insert */ __curdump_update, /* update */ __curdump_remove, /* remove */ + __wt_cursor_notsup, /* reserve */ __wt_cursor_reconfigure_notsup, /* reconfigure */ __curdump_close); /* close */ WT_CURSOR *cursor; diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index 8de33420d17..c43826799cf 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -31,8 +31,8 @@ __curfile_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) WT_ERR_MSG(session, EINVAL, "Cursors must reference the same object"); - WT_CURSOR_CHECKKEY(a); - WT_CURSOR_CHECKKEY(b); + WT_ERR(__cursor_checkkey(a)); + WT_ERR(__cursor_checkkey(b)); ret = __wt_btcur_compare( (WT_CURSOR_BTREE *)a, (WT_CURSOR_BTREE *)b, cmpp); @@ -63,8 +63,8 @@ __curfile_equals(WT_CURSOR *a, WT_CURSOR *b, int *equalp) WT_ERR_MSG(session, EINVAL, "Cursors must reference the same object"); - WT_CURSOR_CHECKKEY(a); - WT_CURSOR_CHECKKEY(b); + WT_ERR(__cursor_checkkey(a)); + WT_ERR(__cursor_checkkey(b)); ret = __wt_btcur_equals( (WT_CURSOR_BTREE *)a, (WT_CURSOR_BTREE *)b, equalp); @@ -182,9 +182,7 @@ __curfile_search(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, search, cbt->btree); - - WT_CURSOR_CHECKKEY(cursor); - WT_CURSOR_NOVALUE(cursor); + WT_ERR(__cursor_checkkey(cursor)); WT_ERR(__wt_btcur_search(cbt)); @@ -209,9 +207,7 @@ __curfile_search_near(WT_CURSOR *cursor, int *exact) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_API_CALL(cursor, session, search_near, cbt->btree); - - WT_CURSOR_CHECKKEY(cursor); - WT_CURSOR_NOVALUE(cursor); + WT_ERR(__cursor_checkkey(cursor)); WT_ERR(__wt_btcur_search_near(cbt, exact)); @@ -238,8 +234,8 @@ __curfile_insert(WT_CURSOR *cursor) CURSOR_UPDATE_API_CALL(cursor, session, insert, cbt->btree); if (!F_ISSET(cursor, WT_CURSTD_APPEND)) - WT_CURSOR_CHECKKEY(cursor); - WT_CURSOR_CHECKVALUE(cursor); + WT_ERR(__cursor_checkkey(cursor)); + WT_ERR(__cursor_checkvalue(cursor)); WT_ERR(__wt_btcur_insert(cbt)); @@ -270,9 +266,7 @@ __wt_curfile_insert_check(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, update, cbt->btree); - - WT_CURSOR_CHECKKEY(cursor); - WT_CURSOR_NOVALUE(cursor); + WT_ERR(__cursor_checkkey(cursor)); ret = __wt_btcur_insert_check(cbt); @@ -293,9 +287,8 @@ __curfile_update(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, update, cbt->btree); - - WT_CURSOR_CHECKKEY(cursor); - WT_CURSOR_CHECKVALUE(cursor); + WT_ERR(__cursor_checkkey(cursor)); + WT_ERR(__cursor_checkvalue(cursor)); WT_ERR(__wt_btcur_update(cbt)); @@ -321,9 +314,7 @@ __curfile_remove(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; CURSOR_REMOVE_API_CALL(cursor, session, cbt->btree); - - WT_CURSOR_CHECKKEY(cursor); - WT_CURSOR_NOVALUE(cursor); + WT_ERR(__cursor_checkkey(cursor)); WT_ERR(__wt_btcur_remove(cbt)); @@ -342,6 +333,42 @@ err: CURSOR_UPDATE_API_END(session, ret); return (ret); } +/* + * __curfile_reserve -- + * WT_CURSOR->reserve method for the btree cursor type. + */ +static int +__curfile_reserve(WT_CURSOR *cursor) +{ + WT_CURSOR_BTREE *cbt; + WT_DECL_RET; + WT_SESSION_IMPL *session; + + cbt = (WT_CURSOR_BTREE *)cursor; + CURSOR_UPDATE_API_CALL(cursor, session, reserve, cbt->btree); + WT_ERR(__cursor_checkkey(cursor)); + + WT_ERR(__wt_txn_context_check(session, true)); + + WT_ERR(__wt_btcur_reserve(cbt)); + + WT_ASSERT(session, + F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); + WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0); + +err: CURSOR_UPDATE_API_END(session, ret); + + /* + * The application might do a WT_CURSOR.get_value call when we return, + * so we need a value and the underlying functions didn't set one up. + * For various reasons, those functions may not have done a search and + * any previous value in the cursor might race with WT_CURSOR.reserve + * (and in cases like LSM, the reserve never encountered the original + * key). For simplicity, repeat the search here. + */ + return (ret == 0 ? cursor->search(cursor) : ret); +} + /* * __curfile_close -- * WT_CURSOR->close method for the btree cursor type. @@ -405,6 +432,7 @@ __curfile_create(WT_SESSION_IMPL *session, __curfile_insert, /* insert */ __curfile_update, /* update */ __curfile_remove, /* remove */ + __curfile_reserve, /* reserve */ __wt_cursor_reconfigure, /* reconfigure */ __curfile_close); /* close */ WT_BTREE *btree; diff --git a/src/cursor/cur_index.c b/src/cursor/cur_index.c index 9e2a19f9709..fcf00e4fa03 100644 --- a/src/cursor/cur_index.c +++ b/src/cursor/cur_index.c @@ -66,8 +66,8 @@ __curindex_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) WT_ERR_MSG(session, EINVAL, "Cursors must reference the same object"); - WT_CURSOR_CHECKKEY(a); - WT_CURSOR_CHECKKEY(b); + WT_ERR(__cursor_checkkey(a)); + WT_ERR(__cursor_checkkey(b)); ret = __wt_compare( session, cindex->index->collator, &a->key, &b->key, cmpp); @@ -451,6 +451,7 @@ __wt_curindex_open(WT_SESSION_IMPL *session, __wt_cursor_notsup, /* insert */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ + __wt_cursor_notsup, /* reserve */ __wt_cursor_reconfigure_notsup, /* reconfigure */ __curindex_close); /* close */ WT_CURSOR_INDEX *cindex; diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index 62069321777..cebf8a7fd6e 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -34,6 +34,7 @@ static int __curjoin_split_key(WT_SESSION_IMPL *, WT_CURSOR_JOIN *, WT_ITEM *, */ int __wt_curjoin_joined(WT_CURSOR *cursor) + WT_GCC_FUNC_ATTRIBUTE((cold)) { WT_SESSION_IMPL *session; @@ -592,6 +593,7 @@ __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, __curjoin_extract_insert, /* insert */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ + __wt_cursor_notsup, /* reserve */ __wt_cursor_reconfigure_notsup, /* reconfigure */ __wt_cursor_notsup); /* close */ WT_DECL_RET; @@ -1293,6 +1295,7 @@ __wt_curjoin_open(WT_SESSION_IMPL *session, __wt_cursor_notsup, /* insert */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ + __wt_cursor_notsup, /* reserve */ __wt_cursor_reconfigure_notsup, /* reconfigure */ __curjoin_close); /* close */ WT_CURSOR *cursor; diff --git a/src/cursor/cur_log.c b/src/cursor/cur_log.c index 36804213dbf..c8dc44bb392 100644 --- a/src/cursor/cur_log.c +++ b/src/cursor/cur_log.c @@ -344,6 +344,7 @@ __wt_curlog_open(WT_SESSION_IMPL *session, __wt_cursor_notsup, /* insert */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ + __wt_cursor_notsup, /* reserve */ __wt_cursor_reconfigure_notsup, /* reconfigure */ __curlog_close); /* close */ WT_CURSOR *cursor; diff --git a/src/cursor/cur_metadata.c b/src/cursor/cur_metadata.c index 284c823768d..9a38996d4ce 100644 --- a/src/cursor/cur_metadata.c +++ b/src/cursor/cur_metadata.c @@ -13,7 +13,7 @@ * backing metadata table cursor. */ #define WT_MD_CURSOR_NEEDKEY(cursor) do { \ - WT_CURSOR_NEEDKEY(cursor); \ + WT_ERR(__cursor_needkey(cursor)); \ WT_ERR(__wt_buf_set(session, \ &((WT_CURSOR_METADATA *)(cursor))->file_cursor->key, \ (cursor)->key.data, (cursor)->key.size)); \ @@ -22,7 +22,7 @@ } while (0) #define WT_MD_CURSOR_NEEDVALUE(cursor) do { \ - WT_CURSOR_NEEDVALUE(cursor); \ + WT_ERR(__cursor_needvalue(cursor)); \ WT_ERR(__wt_buf_set(session, \ &((WT_CURSOR_METADATA *)(cursor))->file_cursor->value, \ (cursor)->value.data, (cursor)->value.size)); \ @@ -552,6 +552,7 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session, __curmetadata_insert, /* insert */ __curmetadata_update, /* update */ __curmetadata_remove, /* remove */ + __wt_cursor_notsup, /* reserve */ __wt_cursor_reconfigure_notsup, /* reconfigure */ __curmetadata_close); /* close */ WT_CURSOR *cursor; diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c index 0f5b40bee40..0bfe5679677 100644 --- a/src/cursor/cur_stat.c +++ b/src/cursor/cur_stat.c @@ -54,7 +54,7 @@ __curstat_get_key(WT_CURSOR *cursor, ...) va_start(ap, cursor); CURSOR_API_CALL(cursor, session, get_key, NULL); - WT_CURSOR_NEEDKEY(cursor); + WT_ERR(__cursor_needkey(cursor)); if (F_ISSET(cursor, WT_CURSTD_RAW)) { WT_ERR(__wt_struct_size( @@ -93,7 +93,7 @@ __curstat_get_value(WT_CURSOR *cursor, ...) va_start(ap, cursor); CURSOR_API_CALL(cursor, session, get_value, NULL); - WT_CURSOR_NEEDVALUE(cursor); + WT_ERR(__cursor_needvalue(cursor)); WT_ERR(cst->stats_desc(cst, WT_STAT_KEY_OFFSET(cst), &desc)); if (F_ISSET(cursor, WT_CURSTD_RAW)) { @@ -287,7 +287,7 @@ __curstat_search(WT_CURSOR *cursor) cst = (WT_CURSOR_STAT *)cursor; CURSOR_API_CALL(cursor, session, search, NULL); - WT_CURSOR_NEEDKEY(cursor); + WT_ERR(__cursor_needkey(cursor)); F_CLR(cursor, WT_CURSTD_VALUE_SET | WT_CURSTD_VALUE_SET); /* Initialize on demand. */ @@ -578,6 +578,7 @@ __wt_curstat_open(WT_SESSION_IMPL *session, __wt_cursor_notsup, /* insert */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ + __wt_cursor_notsup, /* reserve */ __wt_cursor_reconfigure_notsup, /* reconfigure */ __curstat_close); /* close */ WT_CONFIG_ITEM cval, sval; diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c index 692f1aa957b..e42c5c7766e 100644 --- a/src/cursor/cur_std.c +++ b/src/cursor/cur_std.c @@ -136,6 +136,7 @@ __wt_cursor_set_notsup(WT_CURSOR *cursor) cursor->insert = __wt_cursor_notsup; cursor->update = __wt_cursor_notsup; cursor->remove = __wt_cursor_notsup; + cursor->reserve = __wt_cursor_notsup; } /* @@ -275,7 +276,7 @@ __wt_cursor_get_keyv(WT_CURSOR *cursor, uint32_t flags, va_list ap) const char *fmt; CURSOR_API_CALL(cursor, session, get_key, NULL); - if (!F_ISSET(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_KEY_INT)) + if (!F_ISSET(cursor, WT_CURSTD_KEY_SET)) WT_ERR(__wt_cursor_kv_not_set(cursor, true)); if (WT_CURSOR_RECNO(cursor)) { @@ -705,15 +706,17 @@ __wt_cursor_init(WT_CURSOR *cursor, WT_RET(__wt_config_gets_def(session, cfg, "checkpoint", 0, &cval)); if (cval.len != 0) { cursor->insert = __wt_cursor_notsup; - cursor->update = __wt_cursor_notsup; cursor->remove = __wt_cursor_notsup; + cursor->reserve = __wt_cursor_notsup; + cursor->update = __wt_cursor_notsup; } else { WT_RET( __wt_config_gets_def(session, cfg, "readonly", 0, &cval)); if (cval.val != 0 || F_ISSET(S2C(session), WT_CONN_READONLY)) { cursor->insert = __wt_cursor_notsup; - cursor->update = __wt_cursor_notsup; cursor->remove = __wt_cursor_notsup; + cursor->reserve = __wt_cursor_notsup; + cursor->update = __wt_cursor_notsup; } } diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c index 78264c4804d..c6514aaac58 100644 --- a/src/cursor/cur_table.c +++ b/src/cursor/cur_table.c @@ -93,6 +93,7 @@ __wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx, __curextract_insert, /* insert */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ + __wt_cursor_notsup, /* reserve */ __wt_cursor_reconfigure_notsup, /* reconfigure */ __wt_cursor_notsup); /* close */ WT_CURSOR_EXTRACTOR extract_cursor; @@ -110,8 +111,7 @@ __wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx, WT_RET(__wt_cursor_get_raw_key(&ctable->iface, &key)); WT_RET(__wt_cursor_get_raw_value(&ctable->iface, &value)); ret = idx->extractor->extract(idx->extractor, - &session->iface, &key, &value, - &extract_cursor.iface); + &session->iface, &key, &value, &extract_cursor.iface); __wt_buf_free(session, &extract_cursor.iface.key); WT_RET(ret); @@ -190,12 +190,13 @@ __wt_curtable_get_value(WT_CURSOR *cursor, ...) WT_SESSION_IMPL *session; va_list ap; - va_start(ap, cursor); JOINABLE_CURSOR_API_CALL(cursor, session, get_value, NULL); - WT_ERR(__wt_curtable_get_valuev(cursor, ap)); -err: va_end(ap); - API_END_RET(session, ret); + va_start(ap, cursor); + ret = __wt_curtable_get_valuev(cursor, ap); + va_end(ap); + +err: API_END_RET(session, ret); } /* @@ -323,8 +324,8 @@ __curtable_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) if (strcmp(a->internal_uri, b->internal_uri) != 0) WT_ERR_MSG(session, EINVAL, "comparison method cursors must reference the same object"); - WT_CURSOR_CHECKKEY(WT_CURSOR_PRIMARY(a)); - WT_CURSOR_CHECKKEY(WT_CURSOR_PRIMARY(b)); + WT_ERR(__cursor_checkkey(WT_CURSOR_PRIMARY(a))); + WT_ERR(__cursor_checkkey(WT_CURSOR_PRIMARY(b))); ret = WT_CURSOR_PRIMARY(a)->compare( WT_CURSOR_PRIMARY(a), WT_CURSOR_PRIMARY(b), cmpp); @@ -660,6 +661,47 @@ err: CURSOR_UPDATE_API_END(session, ret); return (ret); } +/* + * __curtable_reserve -- + * WT_CURSOR->reserve method for the table cursor type. + */ +static int +__curtable_reserve(WT_CURSOR *cursor) +{ + WT_CURSOR_TABLE *ctable; + WT_DECL_RET; + WT_SESSION_IMPL *session; + + ctable = (WT_CURSOR_TABLE *)cursor; + JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, update, NULL); + + /* + * We don't have to open the indices here, but it makes the code similar + * to other cursor functions, and it's odd for a reserve call to succeed + * but the subsequent update fail opening indices. + * + * Check for a transaction before index open, opening the indices will + * start a transaction if one isn't running. + */ + WT_ERR(__wt_txn_context_check(session, true)); + WT_ERR(__curtable_open_indices(ctable)); + + /* Reserve in column groups, ignore indices. */ + APPLY_CG(ctable, reserve); + +err: CURSOR_UPDATE_API_END(session, ret); + + /* + * The application might do a WT_CURSOR.get_value call when we return, + * so we need a value and the underlying functions didn't set one up. + * For various reasons, those functions may not have done a search and + * any previous value in the cursor might race with WT_CURSOR.reserve + * (and in cases like LSM, the reserve never encountered the original + * key). For simplicity, repeat the search here. + */ + return (ret == 0 ? cursor->search(cursor) : ret); +} + /* * __wt_table_range_truncate -- * Truncate of a cursor range, table implementation. @@ -909,6 +951,7 @@ __wt_curtable_open(WT_SESSION_IMPL *session, __curtable_insert, /* insert */ __curtable_update, /* update */ __curtable_remove, /* remove */ + __curtable_reserve, /* reserve */ __wt_cursor_reconfigure, /* reconfigure */ __curtable_close); /* close */ WT_CONFIG_ITEM cval; diff --git a/src/include/btmem.h b/src/include/btmem.h index d29612143ce..9db764fa864 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -913,13 +913,17 @@ WT_PACKED_STRUCT_BEGIN(__wt_update) WT_UPDATE *next; /* forward-linked list */ /* - * We use the maximum size as an is-deleted flag, which means we can't - * store 4GB objects; I'd rather do that than increase the size of this - * structure for a flag bit. + * Use the maximum size and maximum size-1 as is-deleted and is-reserved + * flags (which means we can't store 4GB objects), instead of increasing + * the size of this structure for a flag bit. */ #define WT_UPDATE_DELETED_VALUE UINT32_MAX -#define WT_UPDATE_DELETED_SET(upd) ((upd)->size = WT_UPDATE_DELETED_VALUE) -#define WT_UPDATE_DELETED_ISSET(upd) ((upd)->size == WT_UPDATE_DELETED_VALUE) +#define WT_UPDATE_DELETED_SET(u) ((u)->size = WT_UPDATE_DELETED_VALUE) +#define WT_UPDATE_DELETED_ISSET(u) ((u)->size == WT_UPDATE_DELETED_VALUE) + +#define WT_UPDATE_RESERVED_VALUE (UINT32_MAX - 1) +#define WT_UPDATE_RESERVED_SET(u) ((u)->size = WT_UPDATE_RESERVED_VALUE) +#define WT_UPDATE_RESERVED_ISSET(u) ((u)->size == WT_UPDATE_RESERVED_VALUE) uint32_t size; /* update length */ /* The untyped value immediately follows the WT_UPDATE structure. */ @@ -932,8 +936,8 @@ WT_PACKED_STRUCT_BEGIN(__wt_update) * cache overhead calculation. */ #define WT_UPDATE_MEMSIZE(upd) \ - WT_ALIGN(sizeof(WT_UPDATE) + \ - (WT_UPDATE_DELETED_ISSET(upd) ? 0 : (upd)->size), 32) + WT_ALIGN(sizeof(WT_UPDATE) + (WT_UPDATE_DELETED_ISSET(upd) || \ + WT_UPDATE_RESERVED_ISSET(upd) ? 0 : (upd)->size), 32) }; /* diff --git a/src/include/cursor.h b/src/include/cursor.h index d905197adc1..b044329fbfe 100644 --- a/src/include/cursor.h +++ b/src/include/cursor.h @@ -24,6 +24,7 @@ insert, \ update, \ remove, \ + reserve, \ reconfigure, \ close) \ static const WT_CURSOR n = { \ @@ -45,6 +46,7 @@ insert, \ update, \ remove, \ + reserve, \ close, \ reconfigure, \ { NULL, NULL }, /* TAILQ_ENTRY q */ \ @@ -497,57 +499,5 @@ struct __wt_cursor_table { #define WT_CURSOR_RECNO(cursor) WT_STREQ((cursor)->key_format, "r") -/* - * WT_CURSOR_NEEDKEY, WT_CURSOR_NEEDVALUE -- - * Check if we have a key/value set. There's an additional semantic - * implemented here: if we're pointing into the tree, and about to perform - * a cursor operation, get a local copy of whatever we're referencing in - * the tree, there's an obvious race with the cursor moving and the key or - * value reference, and it's better to solve it here than in the underlying - * data-source layers. - * - * WT_CURSOR_CHECKKEY -- - * Check if a key is set without making a copy. - * - * WT_CURSOR_NOVALUE -- - * Release any cached value before an operation that could update the - * transaction context and free data a value is pointing to. - */ -#define WT_CURSOR_CHECKKEY(cursor) do { \ - if (!F_ISSET(cursor, WT_CURSTD_KEY_SET)) \ - WT_ERR(__wt_cursor_kv_not_set(cursor, true)); \ -} while (0) -#define WT_CURSOR_CHECKVALUE(cursor) do { \ - if (!F_ISSET(cursor, WT_CURSTD_VALUE_SET)) \ - WT_ERR(__wt_cursor_kv_not_set(cursor, false)); \ -} while (0) -#define WT_CURSOR_NEEDKEY(cursor) do { \ - if (F_ISSET(cursor, WT_CURSTD_KEY_INT)) { \ - if (!WT_DATA_IN_ITEM(&(cursor)->key)) \ - WT_ERR(__wt_buf_set( \ - (WT_SESSION_IMPL *)(cursor)->session, \ - &(cursor)->key, \ - (cursor)->key.data, (cursor)->key.size)); \ - F_CLR(cursor, WT_CURSTD_KEY_INT); \ - F_SET(cursor, WT_CURSTD_KEY_EXT); \ - } \ - WT_CURSOR_CHECKKEY(cursor); \ -} while (0) -#define WT_CURSOR_NEEDVALUE(cursor) do { \ - if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) { \ - if (!WT_DATA_IN_ITEM(&(cursor)->value)) \ - WT_ERR(__wt_buf_set( \ - (WT_SESSION_IMPL *)(cursor)->session, \ - &(cursor)->value, \ - (cursor)->value.data, (cursor)->value.size));\ - F_CLR(cursor, WT_CURSTD_VALUE_INT); \ - F_SET(cursor, WT_CURSTD_VALUE_EXT); \ - } \ - WT_CURSOR_CHECKVALUE(cursor); \ -} while (0) -#define WT_CURSOR_NOVALUE(cursor) do { \ - F_CLR(cursor, WT_CURSTD_VALUE_INT); \ -} while (0) - #define WT_CURSOR_RAW_OK \ (WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_PRINT | WT_CURSTD_RAW) diff --git a/src/include/cursor.i b/src/include/cursor.i index f65364d304c..4b6c5e74320 100644 --- a/src/include/cursor.i +++ b/src/include/cursor.i @@ -17,6 +17,82 @@ __cursor_set_recno(WT_CURSOR_BTREE *cbt, uint64_t v) cbt->iface.recno = cbt->recno = v; } +/* + * __cursor_novalue -- + * Release any cached value before an operation that could update the + * transaction context and free data a value is pointing to. + */ +static inline void +__cursor_novalue(WT_CURSOR *cursor) +{ + F_CLR(cursor, WT_CURSTD_VALUE_INT); +} + +/* + * __cursor_checkkey -- + * Check if a key is set without making a copy. + */ +static inline int +__cursor_checkkey(WT_CURSOR *cursor) +{ + return (F_ISSET(cursor, WT_CURSTD_KEY_SET) ? + 0 : __wt_cursor_kv_not_set(cursor, true)); +} + +/* + * __cursor_checkvalue -- + * Check if a value is set without making a copy. + */ +static inline int +__cursor_checkvalue(WT_CURSOR *cursor) +{ + return (F_ISSET(cursor, WT_CURSTD_VALUE_SET) ? + 0 : __wt_cursor_kv_not_set(cursor, false)); +} + +/* + * __cursor_needkey -- + * + * Check if we have a key set. There's an additional semantic here: if we're + * pointing into the tree, get a local copy of whatever we're referencing in + * the tree, there's an obvious race with the cursor moving and the reference. + */ +static inline int +__cursor_needkey(WT_CURSOR *cursor) +{ + if (F_ISSET(cursor, WT_CURSTD_KEY_INT)) { + if (!WT_DATA_IN_ITEM(&cursor->key)) + WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session, + &cursor->key, cursor->key.data, cursor->key.size)); + F_CLR(cursor, WT_CURSTD_KEY_INT); + F_SET(cursor, WT_CURSTD_KEY_EXT); + return (0); + } + return (__cursor_checkkey(cursor)); +} + +/* + * __cursor_needvalue -- + * + * Check if we have a value set. There's an additional semantic here: if we're + * pointing into the tree, get a local copy of whatever we're referencing in + * the tree, there's an obvious race with the cursor moving and the reference. + */ +static inline int +__cursor_needvalue(WT_CURSOR *cursor) +{ + if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) { + if (!WT_DATA_IN_ITEM(&cursor->value)) + WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session, + &cursor->value, + cursor->value.data, cursor->value.size)); + F_CLR(cursor, WT_CURSTD_VALUE_INT); + F_SET(cursor, WT_CURSTD_VALUE_EXT); + return (0); + } + return (__cursor_checkvalue(cursor)); +} + /* * __cursor_pos_clear -- * Reset the cursor's location. @@ -129,27 +205,24 @@ static inline int __wt_curindex_get_valuev(WT_CURSOR *cursor, va_list ap) { WT_CURSOR_INDEX *cindex; - WT_DECL_RET; WT_ITEM *item; WT_SESSION_IMPL *session; cindex = (WT_CURSOR_INDEX *)cursor; session = (WT_SESSION_IMPL *)cursor->session; - WT_CURSOR_NEEDVALUE(cursor); + WT_RET(__cursor_checkvalue(cursor)); if (F_ISSET(cursor, WT_CURSOR_RAW_OK)) { - ret = __wt_schema_project_merge(session, + WT_RET(__wt_schema_project_merge(session, cindex->cg_cursors, cindex->value_plan, - cursor->value_format, &cursor->value); - if (ret == 0) { - item = va_arg(ap, WT_ITEM *); - item->data = cursor->value.data; - item->size = cursor->value.size; - } + cursor->value_format, &cursor->value)); + item = va_arg(ap, WT_ITEM *); + item->data = cursor->value.data; + item->size = cursor->value.size; } else - ret = __wt_schema_project_out(session, - cindex->cg_cursors, cindex->value_plan, ap); -err: return (ret); + WT_RET(__wt_schema_project_out(session, + cindex->cg_cursors, cindex->value_plan, ap)); + return (0); } /* @@ -161,28 +234,25 @@ __wt_curtable_get_valuev(WT_CURSOR *cursor, va_list ap) { WT_CURSOR *primary; WT_CURSOR_TABLE *ctable; - WT_DECL_RET; WT_ITEM *item; WT_SESSION_IMPL *session; ctable = (WT_CURSOR_TABLE *)cursor; session = (WT_SESSION_IMPL *)cursor->session; primary = *ctable->cg_cursors; - WT_CURSOR_NEEDVALUE(primary); + WT_RET(__cursor_checkvalue(primary)); if (F_ISSET(cursor, WT_CURSOR_RAW_OK)) { - ret = __wt_schema_project_merge(session, + WT_RET(__wt_schema_project_merge(session, ctable->cg_cursors, ctable->plan, - cursor->value_format, &cursor->value); - if (ret == 0) { - item = va_arg(ap, WT_ITEM *); - item->data = cursor->value.data; - item->size = cursor->value.size; - } + cursor->value_format, &cursor->value)); + item = va_arg(ap, WT_ITEM *); + item->data = cursor->value.data; + item->size = cursor->value.size; } else - ret = __wt_schema_project_out(session, - ctable->cg_cursors, ctable->plan, ap); -err: return (ret); + WT_RET(__wt_schema_project_out(session, + ctable->cg_cursors, ctable->plan, ap)); + return (0); } /* diff --git a/src/include/extern.h b/src/include/extern.h index 0aed6c1bd1a..ff835bacc56 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -105,6 +105,7 @@ extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_GCC_FUNC extern int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_btcur_reserve(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -179,7 +180,7 @@ extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *b extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_tree_walk_skip(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -188,9 +189,9 @@ extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, c extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_update_alloc( WT_SESSION_IMPL *session, WT_ITEM *value, WT_UPDATE **updp, size_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, const WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, size_t *sizep, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_update_obsolete_free( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); @@ -289,7 +290,7 @@ extern int __wt_curfile_next_random(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUT extern int __wt_curfile_insert_check(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curjoin_joined(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_curjoin_joined(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curjoin_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_INDEX *idx, WT_CURSOR *ref_cursor, uint8_t flags, uint8_t range, uint64_t count, uint32_t bloom_bit_count, uint32_t bloom_hash_count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, WT_CURSOR_JSON *json, bool iskey, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/misc.i b/src/include/misc.i index 634cc01f893..78c6dc8a7dd 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -177,3 +177,21 @@ __wt_snprintf_len_incr( va_end(ap); return (ret); } + +/* + * __wt_txn_context_check -- + * Complain if a transaction is/isn't running. + */ +static inline int +__wt_txn_context_check(WT_SESSION_IMPL *session, bool requires_txn) +{ + if (requires_txn && !F_ISSET(&session->txn, WT_TXN_RUNNING)) + WT_RET_MSG(session, EINVAL, + "%s: only permitted in a running transaction", + session->name); + if (!requires_txn && F_ISSET(&session->txn, WT_TXN_RUNNING)) + WT_RET_MSG(session, EINVAL, + "%s: not permitted in a running transaction", + session->name); + return (0); +} diff --git a/src/include/txn.i b/src/include/txn.i index 3c096f34b2e..4b6ba17853f 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -69,7 +69,7 @@ __wt_txn_modify(WT_SESSION_IMPL *session, WT_UPDATE *upd) if (F_ISSET(txn, WT_TXN_READONLY)) WT_RET_MSG(session, WT_ROLLBACK, - "Attempt to update in a read only transaction"); + "Attempt to update in a read-only transaction"); WT_RET(__txn_next_op(session, &op)); op->type = F_ISSET(session, WT_SESSION_LOGGING_INMEM) ? @@ -233,8 +233,11 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id) static inline WT_UPDATE * __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd) { - while (upd != NULL && !__wt_txn_visible(session, upd->txnid)) - upd = upd->next; + /* Skip reserved place-holders, they're never visible. */ + for (; upd != NULL; upd = upd->next) + if (!WT_UPDATE_RESERVED_ISSET(upd) && + __wt_txn_visible(session, upd->txnid)) + break; return (upd); } @@ -449,8 +452,7 @@ __wt_txn_read_last(WT_SESSION_IMPL *session) * snapshot here: it will be restored by WT_WITH_TXN_ISOLATION. */ if ((!F_ISSET(txn, WT_TXN_RUNNING) || - txn->isolation != WT_ISO_SNAPSHOT) && - txn->forced_iso == 0) + txn->isolation != WT_ISO_SNAPSHOT) && txn->forced_iso == 0) __wt_txn_release_snapshot(session); } diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index ec2bdd3b637..31b1c16cd6c 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -504,6 +504,23 @@ struct __wt_cursor { * with the specified key exists, ::WT_NOTFOUND is returned. */ int __F(remove)(WT_CURSOR *cursor); + + /*! + * Reserve an existing record so a subsequent write is less likely to + * fail due to a conflict between concurrent operations. + * + * The key must first be set and the record must already exist. + * + * @snippet ex_all.c Reserve a record + * + * On success, the cursor ends positioned at the specified record; to + * minimize cursor resources, the WT_CURSOR::reset method should be + * called as soon as the cursor no longer needs that position. + * + * @param cursor the cursor handle + * @errors + */ + int __F(reserve)(WT_CURSOR *cursor); /*! @} */ /*! diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 8459259dae7..e9943d24cda 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -486,7 +486,7 @@ __clsm_open_cursors( * cursor, take a copy before closing cursors. */ if (F_ISSET(c, WT_CURSTD_KEY_INT)) - WT_CURSOR_NEEDKEY(c); + WT_ERR(__cursor_needkey(c)); F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV); @@ -844,8 +844,8 @@ __clsm_compare(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) WT_ERR_MSG(session, EINVAL, "comparison method cursors must reference the same object"); - WT_CURSOR_NEEDKEY(a); - WT_CURSOR_NEEDKEY(b); + WT_ERR(__cursor_needkey(a)); + WT_ERR(__cursor_needkey(b)); WT_ERR(__wt_compare( session, alsm->lsm_tree->collator, &a->key, &b->key, cmpp)); @@ -871,7 +871,7 @@ __clsm_next(WT_CURSOR *cursor) clsm = (WT_CURSOR_LSM *)cursor; CURSOR_API_CALL(cursor, session, next, NULL); - WT_CURSOR_NOVALUE(cursor); + __cursor_novalue(cursor); WT_ERR(__clsm_enter(clsm, false, false)); /* If we aren't positioned for a forward scan, get started. */ @@ -997,7 +997,7 @@ __clsm_next_random(WT_CURSOR *cursor) clsm = (WT_CURSOR_LSM *)cursor; CURSOR_API_CALL(cursor, session, next, NULL); - WT_CURSOR_NOVALUE(cursor); + __cursor_novalue(cursor); WT_ERR(__clsm_enter(clsm, false, false)); for (;;) { @@ -1051,7 +1051,7 @@ __clsm_prev(WT_CURSOR *cursor) clsm = (WT_CURSOR_LSM *)cursor; CURSOR_API_CALL(cursor, session, prev, NULL); - WT_CURSOR_NOVALUE(cursor); + __cursor_novalue(cursor); WT_ERR(__clsm_enter(clsm, false, false)); /* If we aren't positioned for a reverse scan, get started. */ @@ -1268,8 +1268,8 @@ __clsm_search(WT_CURSOR *cursor) clsm = (WT_CURSOR_LSM *)cursor; CURSOR_API_CALL(cursor, session, search, NULL); - WT_CURSOR_NEEDKEY(cursor); - WT_CURSOR_NOVALUE(cursor); + WT_ERR(__cursor_needkey(cursor)); + __cursor_novalue(cursor); WT_ERR(__clsm_enter(clsm, true, false)); ret = __clsm_lookup(clsm, &cursor->value); @@ -1301,8 +1301,8 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp) exact = 0; CURSOR_API_CALL(cursor, session, search_near, NULL); - WT_CURSOR_NEEDKEY(cursor); - WT_CURSOR_NOVALUE(cursor); + WT_ERR(__cursor_needkey(cursor)); + __cursor_novalue(cursor); WT_ERR(__clsm_enter(clsm, true, false)); F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV); @@ -1438,11 +1438,12 @@ err: __clsm_leave(clsm); */ static inline int __clsm_put(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm, - const WT_ITEM *key, const WT_ITEM *value, bool position) + const WT_ITEM *key, const WT_ITEM *value, bool position, bool reserve) { WT_CURSOR *c, *primary; WT_LSM_TREE *lsm_tree; u_int i, slot; + int (*func)(WT_CURSOR *); lsm_tree = clsm->lsm_tree; @@ -1473,8 +1474,12 @@ __clsm_put(WT_SESSION_IMPL *session, WT_CURSOR_LSM *clsm, c = clsm->chunks[slot]->cursor; c->set_key(c, key); - c->set_value(c, value); - WT_RET((position && i == 0) ? c->update(c) : c->insert(c)); + func = c->insert; + if (i == 0 && position) + func = reserve ? c->reserve : c->update; + if (func != c->reserve) + c->set_value(c, value); + WT_RET(func(c)); } /* @@ -1521,8 +1526,8 @@ __clsm_insert(WT_CURSOR *cursor) clsm = (WT_CURSOR_LSM *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, insert, NULL); - WT_CURSOR_NEEDKEY(cursor); - WT_CURSOR_NEEDVALUE(cursor); + WT_ERR(__cursor_needkey(cursor)); + WT_ERR(__cursor_needvalue(cursor)); WT_ERR(__clsm_enter(clsm, false, true)); if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && @@ -1533,7 +1538,7 @@ __clsm_insert(WT_CURSOR *cursor) } WT_ERR(__clsm_deleted_encode(session, &cursor->value, &value, &buf)); - WT_ERR(__clsm_put(session, clsm, &cursor->key, &value, false)); + WT_ERR(__clsm_put(session, clsm, &cursor->key, &value, false, false)); /* * WT_CURSOR.insert doesn't leave the cursor positioned, and the @@ -1565,14 +1570,14 @@ __clsm_update(WT_CURSOR *cursor) clsm = (WT_CURSOR_LSM *)cursor; CURSOR_UPDATE_API_CALL(cursor, session, update, NULL); - WT_CURSOR_NEEDKEY(cursor); - WT_CURSOR_NEEDVALUE(cursor); + WT_ERR(__cursor_needkey(cursor)); + WT_ERR(__cursor_needvalue(cursor)); WT_ERR(__clsm_enter(clsm, false, true)); if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) WT_ERR(__clsm_lookup(clsm, &value)); WT_ERR(__clsm_deleted_encode(session, &cursor->value, &value, &buf)); - WT_ERR(__clsm_put(session, clsm, &cursor->key, &value, true)); + WT_ERR(__clsm_put(session, clsm, &cursor->key, &value, true, false)); /* * Set the cursor to reference the internal key/value of the positioned @@ -1612,14 +1617,14 @@ __clsm_remove(WT_CURSOR *cursor) positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT); CURSOR_REMOVE_API_CALL(cursor, session, NULL); - WT_CURSOR_NEEDKEY(cursor); - WT_CURSOR_NOVALUE(cursor); + WT_ERR(__cursor_needkey(cursor)); + __cursor_novalue(cursor); WT_ERR(__clsm_enter(clsm, false, true)); if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) WT_ERR(__clsm_lookup(clsm, &value)); WT_ERR(__clsm_put( - session, clsm, &cursor->key, &__tombstone, positioned)); + session, clsm, &cursor->key, &__tombstone, positioned, false)); /* * If the cursor was positioned, it stays positioned with a key but no @@ -1638,6 +1643,43 @@ err: __clsm_leave(clsm); return (ret); } +/* + * __clsm_reserve -- + * WT_CURSOR->reserve method for the LSM cursor type. + */ +static int +__clsm_reserve(WT_CURSOR *cursor) +{ + WT_CURSOR_LSM *clsm; + WT_DECL_RET; + WT_ITEM value; + WT_SESSION_IMPL *session; + + clsm = (WT_CURSOR_LSM *)cursor; + + CURSOR_UPDATE_API_CALL(cursor, session, reserve, NULL); + WT_ERR(__cursor_needkey(cursor)); + __cursor_novalue(cursor); + WT_ERR(__wt_txn_context_check(session, true)); + WT_ERR(__clsm_enter(clsm, false, true)); + + if ((ret = __clsm_lookup(clsm, &value)) == 0) + ret = __clsm_put(session, clsm, &cursor->key, NULL, true, true); + +err: __clsm_leave(clsm); + CURSOR_UPDATE_API_END(session, ret); + + /* + * The application might do a WT_CURSOR.get_value call when we return, + * so we need a value and the underlying functions didn't set one up. + * For various reasons, those functions may not have done a search and + * any previous value in the cursor might race with WT_CURSOR.reserve + * (and in cases like LSM, the reserve never encountered the original + * key). For simplicity, repeat the search here. + */ + return (ret == 0 ? cursor->search(cursor) : ret); +} + /* * __wt_clsm_close -- * WT_CURSOR->close method for the LSM cursor type. @@ -1694,6 +1736,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session, __clsm_insert, /* insert */ __clsm_update, /* update */ __clsm_remove, /* remove */ + __clsm_reserve, /* reserve */ __wt_cursor_reconfigure, /* reconfigure */ __wt_clsm_close); /* close */ WT_CURSOR *cursor; diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 0740239758c..469968c6c4b 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -1179,6 +1179,9 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, } } + /* Reconciliation should never see a reserved update. */ + WT_ASSERT(session, *updp == NULL || !WT_UPDATE_RESERVED_ISSET(*updp)); + /* * If all of the updates were aborted, quit. This test is not strictly * necessary because the above loop exits with skipped not set and the @@ -1351,13 +1354,13 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, */ if (vpack == NULL || vpack->type == WT_CELL_DEL) WT_RET(__wt_update_alloc( - session, NULL, &append, ¬used)); + session, NULL, &append, ¬used, true, false)); else { WT_RET(__wt_scr_alloc(session, 0, &tmp)); if ((ret = __wt_page_cell_data_ref( session, page, vpack, tmp)) == 0) - ret = __wt_update_alloc( - session, tmp, &append, ¬used); + ret = __wt_update_alloc(session, + tmp, &append, ¬used, false, false); __wt_scr_free(session, &tmp); WT_RET(ret); } diff --git a/src/schema/schema_truncate.c b/src/schema/schema_truncate.c index 05c6f32551a..b3a69dd5abd 100644 --- a/src/schema/schema_truncate.c +++ b/src/schema/schema_truncate.c @@ -138,9 +138,9 @@ __wt_schema_range_truncate( uri = start->internal_uri; if (WT_PREFIX_MATCH(uri, "file:")) { - WT_CURSOR_NEEDKEY(start); + WT_ERR(__cursor_needkey(start)); if (stop != NULL) - WT_CURSOR_NEEDKEY(stop); + WT_ERR(__cursor_needkey(stop)); WT_WITH_BTREE(session, ((WT_CURSOR_BTREE *)start)->btree, ret = __wt_btcur_range_truncate( (WT_CURSOR_BTREE *)start, (WT_CURSOR_BTREE *)stop)); diff --git a/src/session/session_api.c b/src/session/session_api.c index 73979947f49..21f03915e2a 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -293,8 +293,7 @@ __session_reconfigure(WT_SESSION *wt_session, const char *config) */ WT_UNUSED(cfg); - if (F_ISSET(&session->txn, WT_TXN_RUNNING)) - WT_ERR_MSG(session, EINVAL, "transaction in progress"); + WT_ERR(__wt_txn_context_check(session, false)); WT_ERR(__wt_session_reset_cursors(session, false)); @@ -816,8 +815,7 @@ __session_reset(WT_SESSION *wt_session) SESSION_API_CALL_NOCONF(session, reset); - if (F_ISSET(&session->txn, WT_TXN_RUNNING)) - WT_ERR_MSG(session, EINVAL, "transaction in progress"); + WT_ERR(__wt_txn_context_check(session, false)); WT_TRET(__wt_session_reset_cursors(session, true)); @@ -1403,8 +1401,7 @@ __session_begin_transaction(WT_SESSION *wt_session, const char *config) SESSION_API_CALL(session, begin_transaction, config, cfg); WT_STAT_CONN_INCR(session, txn_begin); - if (F_ISSET(&session->txn, WT_TXN_RUNNING)) - WT_ERR_MSG(session, EINVAL, "Transaction already running"); + WT_ERR(__wt_txn_context_check(session, false)); ret = __wt_txn_begin(session, cfg); @@ -1426,6 +1423,8 @@ __session_commit_transaction(WT_SESSION *wt_session, const char *config) SESSION_API_CALL(session, commit_transaction, config, cfg); WT_STAT_CONN_INCR(session, txn_commit); + WT_ERR(__wt_txn_context_check(session, true)); + txn = &session->txn; if (F_ISSET(txn, WT_TXN_ERROR) && txn->mod_count != 0) WT_ERR_MSG(session, EINVAL, @@ -1455,6 +1454,8 @@ __session_rollback_transaction(WT_SESSION *wt_session, const char *config) SESSION_API_CALL(session, rollback_transaction, config, cfg); WT_STAT_CONN_INCR(session, txn_rollback); + WT_ERR(__wt_txn_context_check(session, true)); + WT_TRET(__wt_session_reset_cursors(session, false)); WT_TRET(__wt_txn_rollback(session, cfg)); @@ -1520,7 +1521,6 @@ __session_transaction_sync(WT_SESSION *wt_session, const char *config) WT_DECL_RET; WT_LOG *log; WT_SESSION_IMPL *session; - WT_TXN *txn; struct timespec now, start; uint64_t remaining_usec, timeout_ms, waited_ms; bool forever; @@ -1530,9 +1530,7 @@ __session_transaction_sync(WT_SESSION *wt_session, const char *config) WT_STAT_CONN_INCR(session, txn_sync); conn = S2C(session); - txn = &session->txn; - if (F_ISSET(txn, WT_TXN_RUNNING)) - WT_ERR_MSG(session, EINVAL, "transaction in progress"); + WT_ERR(__wt_txn_context_check(session, false)); /* * If logging is not enabled there is nothing to do. @@ -1623,7 +1621,6 @@ __session_checkpoint(WT_SESSION *wt_session, const char *config) { WT_DECL_RET; WT_SESSION_IMPL *session; - WT_TXN *txn; session = (WT_SESSION_IMPL *)wt_session; @@ -1648,10 +1645,7 @@ __session_checkpoint(WT_SESSION *wt_session, const char *config) * from evicting anything newer than this because we track the oldest * transaction ID in the system that is not visible to all readers. */ - txn = &session->txn; - if (F_ISSET(txn, WT_TXN_RUNNING)) - WT_ERR_MSG(session, EINVAL, - "Checkpoint not permitted in a transaction"); + WT_ERR(__wt_txn_context_check(session, false)); ret = __wt_txn_checkpoint(session, cfg, true); diff --git a/src/session/session_compact.c b/src/session/session_compact.c index a22ad6f22ef..30c6ad297f7 100644 --- a/src/session/session_compact.c +++ b/src/session/session_compact.c @@ -316,7 +316,6 @@ __wt_session_compact( WT_DATA_SOURCE *dsrc; WT_DECL_RET; WT_SESSION_IMPL *session; - WT_TXN *txn; u_int i; session = (WT_SESSION_IMPL *)wt_session; @@ -332,10 +331,7 @@ __wt_session_compact( * reason for LSM to allow this, possible or not), and check now so the * error message isn't confusing. */ - txn = &session->txn; - if (F_ISSET(txn, WT_TXN_RUNNING)) - WT_ERR_MSG(session, EINVAL, - "compaction not permitted in a transaction"); + WT_ERR(__wt_txn_context_check(session, false)); /* Disallow objects in the WiredTiger name space. */ WT_ERR(__wt_str_name_check(session, uri)); diff --git a/src/txn/txn.c b/src/txn/txn.c index 55d2e3c2900..ac4be37f855 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -509,10 +509,9 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) txn = &session->txn; conn = S2C(session); did_update = txn->mod_count != 0; - WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || !did_update); - if (!F_ISSET(txn, WT_TXN_RUNNING)) - WT_RET_MSG(session, EINVAL, "No transaction is active"); + WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING)); + WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || !did_update); /* * The default sync setting is inherited from the connection, but can @@ -593,9 +592,26 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) return (ret); } - /* Free memory associated with updates. */ - for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) + for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) { + switch (op->type) { + case WT_TXN_OP_BASIC: + case WT_TXN_OP_INMEM: + /* + * Switch reserved operations to abort to simplify + * obsolete update list truncation. + */ + if (WT_UPDATE_RESERVED_ISSET(op->u.upd)) + op->u.upd->txnid = WT_TXN_ABORTED; + break; + case WT_TXN_OP_REF: + case WT_TXN_OP_TRUNCATE_COL: + case WT_TXN_OP_TRUNCATE_ROW: + break; + } + + /* Free memory associated with updates. */ __wt_txn_op_free(session, op); + } txn->mod_count = 0; __wt_txn_release(session); @@ -617,8 +633,7 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) WT_UNUSED(cfg); txn = &session->txn; - if (!F_ISSET(txn, WT_TXN_RUNNING)) - WT_RET_MSG(session, EINVAL, "No transaction is active"); + WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING)); /* Rollback notification. */ if (txn->notify != NULL) diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c index 4d5f1df6a88..72129692138 100644 --- a/src/txn/txn_log.c +++ b/src/txn/txn_log.c @@ -31,32 +31,32 @@ __txn_op_log(WT_SESSION_IMPL *session, value.data = WT_UPDATE_DATA(upd); value.size = upd->size; + /* We shouldn't be logging reserve operations. */ + WT_ASSERT(session, !WT_UPDATE_RESERVED_ISSET(upd)); + /* - * Log the operation. It must be one of the following: - * 1) column store remove; - * 2) column store insert/update; - * 3) row store remove; or - * 4) row store insert/update. + * Log the operation. It must be a row- or column-store insert, remove + * or update, all of which require log records. */ if (cbt->btree->type == BTREE_ROW) { WT_ERR(__wt_cursor_row_leaf_key(cbt, &key)); if (WT_UPDATE_DELETED_ISSET(upd)) - WT_ERR(__wt_logop_row_remove_pack(session, logrec, - op->fileid, &key)); + WT_ERR(__wt_logop_row_remove_pack( + session, logrec, op->fileid, &key)); else - WT_ERR(__wt_logop_row_put_pack(session, logrec, - op->fileid, &key, &value)); + WT_ERR(__wt_logop_row_put_pack( + session, logrec, op->fileid, &key, &value)); } else { recno = WT_INSERT_RECNO(cbt->ins); WT_ASSERT(session, recno != WT_RECNO_OOB); if (WT_UPDATE_DELETED_ISSET(upd)) - WT_ERR(__wt_logop_col_remove_pack(session, logrec, - op->fileid, recno)); + WT_ERR(__wt_logop_col_remove_pack( + session, logrec, op->fileid, recno)); else - WT_ERR(__wt_logop_col_put_pack(session, logrec, - op->fileid, recno, &value)); + WT_ERR(__wt_logop_col_put_pack( + session, logrec, op->fileid, recno, &value)); } err: __wt_buf_free(session, &key); diff --git a/test/csuite/scope/main.c b/test/csuite/scope/main.c index d2d902f33f9..8b9a79decd0 100644 --- a/test/csuite/scope/main.c +++ b/test/csuite/scope/main.c @@ -75,9 +75,7 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) { "insert", INSERT, NULL, }, { "search", SEARCH, NULL, }, { "search", SEARCH_NEAR, NULL, }, -#if 0 { "reserve", RESERVE, NULL, }, -#endif { "update", UPDATE, NULL, }, { "remove", REMOVE, NULL, }, { "remove", REMOVE_POS, NULL, }, @@ -148,9 +146,7 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) testutil_check(cursor->remove(cursor)); break; case RESERVE: -#if 0 testutil_check(cursor->reserve(cursor)); -#endif break; case UPDATE: testutil_check(cursor->update(cursor)); diff --git a/test/format/ops.c b/test/format/ops.c index c3472fd28c3..6e3e3b783c5 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -30,6 +30,7 @@ static int col_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); +static int col_reserve(WT_CURSOR *, uint64_t, bool); static int col_update( TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static int nextprev(WT_CURSOR *, int); @@ -37,6 +38,7 @@ static void *ops(void *); static int row_insert( TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); +static int row_reserve(WT_CURSOR *, WT_ITEM *, uint64_t, bool); static int row_update( TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static void table_append_init(void); @@ -636,7 +638,7 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ testutil_assert(ret == WT_NOTFOUND); } } -#if 0 + /* Optionally reserve a row. */ if (!readonly && intxn && mmrand(&tinfo->rnd, 0, 20) == 1) { switch (g.type) { @@ -659,7 +661,7 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ testutil_assert(ret == WT_NOTFOUND); } } -#endif + /* Perform the operation. */ switch (op) { case INSERT: @@ -1103,7 +1105,6 @@ nextprev(WT_CURSOR *cursor, int next) return (ret); } -#if 0 /* * row_reserve -- * Reserve a row in a row-store file. @@ -1166,7 +1167,6 @@ col_reserve(WT_CURSOR *cursor, uint64_t keyno, bool positioned) } return (0); } -#endif /* * row_update -- diff --git a/test/suite/test_reserve.py b/test/suite/test_reserve.py new file mode 100644 index 00000000000..23159ed7f8a --- /dev/null +++ b/test/suite/test_reserve.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# test_reserve.py +# Reserve update tests. + +import wiredtiger, wttest +from wtdataset import SimpleDataSet, SimpleIndexDataSet +from wtdataset import SimpleLSMDataSet, ComplexDataSet, ComplexLSMDataSet +from wtscenario import make_scenarios + +# Test WT_CURSOR.reserve. +class test_reserve(wttest.WiredTigerTestCase): + + keyfmt = [ + ('integer', dict(keyfmt='i')), + ('recno', dict(keyfmt='r')), + ('string', dict(keyfmt='S')), + ] + types = [ + ('file', dict(uri='file', ds=SimpleDataSet)), + ('lsm', dict(uri='lsm', ds=SimpleDataSet)), + ('table-complex', dict(uri='table', ds=ComplexDataSet)), + ('table-complex-lsm', dict(uri='table', ds=ComplexLSMDataSet)), + ('table-index', dict(uri='table', ds=SimpleIndexDataSet)), + ('table-simple', dict(uri='table', ds=SimpleDataSet)), + ('table-simple-lsm', dict(uri='table', ds=SimpleLSMDataSet)), + ] + scenarios = make_scenarios(types, keyfmt) + + def skip(self): + return self.keyfmt == 'r' and \ + (self.ds.is_lsm() or self.uri == 'lsm') + + def test_reserve(self): + if self.skip(): + return + + uri = self.uri + ':test_reserve' + + ds = self.ds(self, uri, 500, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + + # Repeatedly update a record. + for i in range(1, 5): + s.begin_transaction('isolation=snapshot') + c.set_key(ds.key(100)) + c.set_value(ds.value(100)) + self.assertEquals(c.update(), 0) + s.commit_transaction() + + # Confirm reserve fails if the record doesn't exist. + s.begin_transaction('isolation=snapshot') + c.set_key(ds.key(600)) + self.assertRaises(wiredtiger.WiredTigerError, lambda:c.reserve()) + s.rollback_transaction() + + # Repeatedly reserve a record and commit. + for i in range(1, 5): + s.begin_transaction('isolation=snapshot') + c.set_key(ds.key(100)) + self.assertEquals(c.reserve(), 0) + s.commit_transaction() + + # Repeatedly reserve a record and rollback. + for i in range(1, 5): + s.begin_transaction('isolation=snapshot') + c.set_key(ds.key(100)) + self.assertEquals(c.reserve(), 0) + s.rollback_transaction() + + # Repeatedly reserve, then update, a record, and commit. + for i in range(1, 5): + s.begin_transaction('isolation=snapshot') + c.set_key(ds.key(100)) + self.assertEquals(c.reserve(), 0) + c.set_value(ds.value(100)) + self.assertEquals(c.update(), 0) + s.commit_transaction() + + # Repeatedly reserve, then update, a record, and rollback. + for i in range(1, 5): + s.begin_transaction('isolation=snapshot') + c.set_key(ds.key(100)) + self.assertEquals(c.reserve(), 0) + c.set_value(ds.value(100)) + self.assertEquals(c.update(), 0) + s.commit_transaction() + + # Reserve a slot, repeatedly try and update a record from another + # transaction (which should fail), repeatedly update a record and + # commit. + s2 = self.conn.open_session() + c2 = s2.open_cursor(uri, None) + for i in range(1, 2): + s.begin_transaction('isolation=snapshot') + c.set_key(ds.key(100)) + self.assertEquals(c.reserve(), 0) + + s2.begin_transaction('isolation=snapshot') + c2.set_key(ds.key(100)) + c2.set_value(ds.value(100)) + self.assertRaises(wiredtiger.WiredTigerError, lambda:c2.update()) + s2.rollback_transaction() + + c.set_key(ds.key(100)) + c.set_value(ds.value(100)) + self.assertEquals(c.update(), 0) + s.commit_transaction() + + # Test cursor.reserve will fail if a key has not yet been set. + def test_reserve_without_key(self): + if self.skip(): + return + + uri = self.uri + ':test_reserve_without_key' + + ds = self.ds(self, uri, 10, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + s.begin_transaction('isolation=snapshot') + msg = "/requires key be set/" + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, lambda:c.reserve(), msg) + + # Test cursor.reserve will fail if there's no running transaction. + def test_reserve_without_txn(self): + if self.skip(): + return + + uri = self.uri + ':test_reserve_without_txn' + + ds = self.ds(self, uri, 10, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + c.set_key(ds.key(5)) + msg = "/only permitted in a running transaction/" + self.assertRaisesWithMessage( + wiredtiger.WiredTigerError, lambda:c.reserve(), msg) + + # Test cursor.reserve returns a value on success. + def test_reserve_returns_value(self): + if self.skip(): + return + + uri = self.uri + ':test_reserve_returns_value' + + ds = self.ds(self, uri, 10, key_format=self.keyfmt) + ds.populate() + s = self.conn.open_session() + c = s.open_cursor(uri, None) + s.begin_transaction('isolation=snapshot') + c.set_key(ds.key(5)) + self.assertEquals(c.reserve(), 0) + self.assertEqual(c.get_value(), ds.comparable_value(5)) + + # Test cursor.reserve fails on non-standard cursors. + def test_reserve_not_supported(self): + if self.skip(): + return + + uri = self.uri + ':test_reserve_not_supported' + s = self.conn.open_session() + s.create(uri, 'key_format=' + self.keyfmt + ",value_format=S") + + list = [ "bulk", "dump=json" ] + for l in list: + c = s.open_cursor(uri, None, l) + msg = "/Operation not supported/" + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda:self.assertEquals(c.reserve(), 0), msg) + c.close() + + list = [ "backup:", "config:" "log:" "metadata:" "statistics:" ] + for l in list: + c = s.open_cursor(l, None, None) + msg = "/Operation not supported/" + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda:self.assertEquals(c.reserve(), 0), msg) + +if __name__ == '__main__': + wttest.run() diff --git a/test/suite/wtdataset.py b/test/suite/wtdataset.py index 9adbf954baa..88f73021639 100644 --- a/test/suite/wtdataset.py +++ b/test/suite/wtdataset.py @@ -119,6 +119,11 @@ class SimpleDataSet(BaseDataSet): def __init__(self, testcase, uri, rows, **kwargs): super(SimpleDataSet, self).__init__(testcase, uri, rows, **kwargs) + # A value suitable for checking the value returned by a cursor. + def comparable_value(self, i): + return BaseDataSet.value_by_format(i, self.value_format) + + # A value suitable for assigning to a cursor. def value(self, i): return BaseDataSet.value_by_format(i, self.value_format) @@ -260,9 +265,8 @@ class ComplexDataSet(BaseDataSet): str(i) + ': abcdefghijklmnopqrstuvwxyz'[0:i%23], str(i) + ': abcdefghijklmnopqrstuvwxyz'[0:i%18]] - # A value suitable for assigning to a cursor, as - # cursor.set_value() expects a tuple when there it is used with - # a single argument and the value is composite. + # A value suitable for assigning to a cursor, as cursor.set_value() expects + # a tuple when it is used with a single argument and the value is composite. def value(self, i): return tuple(self.comparable_value(i)) -- cgit v1.2.1 From 45759b71aacc73b71cd8741fc5c46f34a5332f7e Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 13 Apr 2017 00:23:19 -0400 Subject: WT-3278 log the row-store cursor key instead of page key (#3383) --- src/include/btree.i | 27 ------------------ src/txn/txn_log.c | 79 +++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 62 insertions(+), 44 deletions(-) diff --git a/src/include/btree.i b/src/include/btree.i index e89c7809a79..474b40bf805 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1022,33 +1022,6 @@ __wt_row_leaf_key(WT_SESSION_IMPL *session, return (__wt_row_leaf_key_work(session, page, rip, key, instantiate)); } -/* - * __wt_cursor_row_leaf_key -- - * Set a buffer to reference a cursor-referenced row-store leaf page key. - */ -static inline int -__wt_cursor_row_leaf_key(WT_CURSOR_BTREE *cbt, WT_ITEM *key) -{ - WT_PAGE *page; - WT_ROW *rip; - WT_SESSION_IMPL *session; - - /* - * If the cursor references a WT_INSERT item, take the key from there, - * else take the key from the original page. - */ - if (cbt->ins == NULL) { - session = (WT_SESSION_IMPL *)cbt->iface.session; - page = cbt->ref->page; - rip = &page->pg_row[cbt->slot]; - WT_RET(__wt_row_leaf_key(session, page, rip, key, false)); - } else { - key->data = WT_INSERT_KEY(cbt->ins); - key->size = WT_INSERT_KEY_SIZE(cbt->ins); - } - return (0); -} - /* * __wt_row_leaf_value_cell -- * Return a pointer to the value cell for a row-store leaf page key, or diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c index 72129692138..67de19abe4f 100644 --- a/src/txn/txn_log.c +++ b/src/txn/txn_log.c @@ -13,6 +13,51 @@ typedef struct { uint32_t flags; } WT_TXN_PRINTLOG_ARGS; +#ifdef HAVE_DIAGNOSTIC +/* + * __txn_op_log_row_key_check -- + * Confirm the cursor references the correct key. + */ +static void +__txn_op_log_row_key_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) +{ + WT_CURSOR *cursor; + WT_ITEM key; + WT_PAGE *page; + WT_ROW *rip; + + cursor = &cbt->iface; + WT_ASSERT(session, F_ISSET(cursor, WT_CURSTD_KEY_SET)); + + memset(&key, 0, sizeof(key)); + + /* + * We used to take the key for row-store logging from the page + * referenced by the cursor, when we switched to taking it from the + * cursor itself. Check that they are the same. + * + * If the cursor references a WT_INSERT item, take the key from there, + * else take the key from the original page. + */ + if (cbt->ins == NULL) { + session = (WT_SESSION_IMPL *)cbt->iface.session; + page = cbt->ref->page; + rip = &page->pg_row[cbt->slot]; + WT_ASSERT(session, + __wt_row_leaf_key(session, page, rip, &key, false) == 0); + } else { + key.data = WT_INSERT_KEY(cbt->ins); + key.size = WT_INSERT_KEY_SIZE(cbt->ins); + } + + WT_ASSERT(session, + key.size == cursor->key.size && + memcmp(key.data, cursor->key.data, key.size) == 0); + + __wt_buf_free(session, &key); +} +#endif + /* * __txn_op_log -- * Log an operation for the current transaction. @@ -21,46 +66,46 @@ static int __txn_op_log(WT_SESSION_IMPL *session, WT_ITEM *logrec, WT_TXN_OP *op, WT_CURSOR_BTREE *cbt) { - WT_DECL_RET; - WT_ITEM key, value; + WT_CURSOR *cursor; + WT_ITEM value; WT_UPDATE *upd; uint64_t recno; - WT_CLEAR(key); + cursor = &cbt->iface; + upd = op->u.upd; value.data = WT_UPDATE_DATA(upd); value.size = upd->size; - /* We shouldn't be logging reserve operations. */ - WT_ASSERT(session, !WT_UPDATE_RESERVED_ISSET(upd)); - /* * Log the operation. It must be a row- or column-store insert, remove - * or update, all of which require log records. + * or update, all of which require log records. We shouldn't ever log + * reserve operations. */ + WT_ASSERT(session, !WT_UPDATE_RESERVED_ISSET(upd)); if (cbt->btree->type == BTREE_ROW) { - WT_ERR(__wt_cursor_row_leaf_key(cbt, &key)); - +#ifdef HAVE_DIAGNOSTIC + __txn_op_log_row_key_check(session, cbt); +#endif if (WT_UPDATE_DELETED_ISSET(upd)) - WT_ERR(__wt_logop_row_remove_pack( - session, logrec, op->fileid, &key)); + WT_RET(__wt_logop_row_remove_pack( + session, logrec, op->fileid, &cursor->key)); else - WT_ERR(__wt_logop_row_put_pack( - session, logrec, op->fileid, &key, &value)); + WT_RET(__wt_logop_row_put_pack( + session, logrec, op->fileid, &cursor->key, &value)); } else { recno = WT_INSERT_RECNO(cbt->ins); WT_ASSERT(session, recno != WT_RECNO_OOB); if (WT_UPDATE_DELETED_ISSET(upd)) - WT_ERR(__wt_logop_col_remove_pack( + WT_RET(__wt_logop_col_remove_pack( session, logrec, op->fileid, recno)); else - WT_ERR(__wt_logop_col_put_pack( + WT_RET(__wt_logop_col_put_pack( session, logrec, op->fileid, recno, &value)); } -err: __wt_buf_free(session, &key); - return (ret); + return (0); } /* -- cgit v1.2.1 From 787c625446989be6745e069d7b427f2370d2ddda Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 13 Apr 2017 00:59:39 -0400 Subject: WT-3261 add a checkpoint epoch to avoid draining the eviction queue (#3370) --- src/btree/bt_read.c | 7 +------ src/btree/bt_sync.c | 22 ++++------------------ src/evict/evict_page.c | 12 +++++++----- src/include/btree.h | 27 +++++++++++++-------------- src/include/btree.i | 20 ++++++++++---------- src/include/extern.h | 2 ++ src/include/session.h | 9 +++++---- src/support/generation.c | 20 ++++++++++++-------- 8 files changed, 54 insertions(+), 65 deletions(-) diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index fb69afb166c..72a69e8591c 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -585,15 +585,10 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags * if the page qualifies for forced eviction and update * the page's generation number. If eviction isn't being * done on this file, we're done. - * In-memory split of large pages is allowed while - * no_eviction is set on btree, whereas reconciliation - * is not allowed. */ if (LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(session, WT_SESSION_NO_EVICTION) || - btree->lsm_primary || - (btree->evict_disabled > 0 && - !F_ISSET(btree, WT_BTREE_ALLOW_SPLITS))) + btree->evict_disabled > 0 || btree->lsm_primary) goto skip_evict; /* diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index 81e9d1757bb..112f0725f94 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -180,21 +180,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) * any problematic eviction or page splits to complete. */ WT_PUBLISH(btree->checkpointing, WT_CKPT_PREPARE); - - /* - * Sync for checkpoint allows splits to happen while the queue - * is being drained, but not reconciliation. We need to do this, - * since draining the queue can take long enough for hot pages - * to grow significantly larger than the configured maximum - * size. - */ - F_SET(btree, WT_BTREE_ALLOW_SPLITS); - ret = __wt_evict_file_exclusive_on(session); - F_CLR(btree, WT_BTREE_ALLOW_SPLITS); - WT_ERR(ret); - __wt_evict_file_exclusive_off(session); - - WT_PUBLISH(btree->checkpointing, WT_CKPT_RUNNING); + (void)__wt_gen_next_drain(session, WT_GEN_EVICT); + btree->checkpointing = WT_CKPT_RUNNING; /* Write all dirty in-cache pages. */ flags |= WT_READ_NO_EVICT; @@ -268,9 +255,8 @@ err: /* On error, clear any left-over tree walk. */ saved_pinned_id == WT_TXN_NONE) __wt_txn_release_snapshot(session); - /* Clear the checkpoint flag and push the change. */ - if (btree->checkpointing != WT_CKPT_OFF) - WT_PUBLISH(btree->checkpointing, WT_CKPT_OFF); + /* Clear the checkpoint flag. */ + btree->checkpointing = WT_CKPT_OFF; __wt_spin_unlock(session, &btree->flush_lock); diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index 9498e2fb313..edcd108e7e4 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -113,6 +113,9 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) /* Checkpoints should never do eviction. */ WT_ASSERT(session, !WT_SESSION_IS_CHECKPOINT(session)); + /* Enter the eviction generation. */ + __wt_session_gen_enter(session, WT_GEN_EVICT); + page = ref->page; tree_dead = F_ISSET(session->dhandle, WT_DHANDLE_DEAD); @@ -133,7 +136,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) * we want: there is nothing more to do. */ if (LF_ISSET(WT_EVICT_INMEM_SPLIT)) - return (0); + goto done; /* Count evictions of internal pages during normal operation. */ if (!closing && WT_PAGE_IS_INTERNAL(page)) { @@ -182,6 +185,9 @@ err: if (!closing) WT_STAT_DATA_INCR(session, cache_eviction_fail); } +done: /* Leave the eviction generation. */ + __wt_session_gen_leave(session, WT_GEN_EVICT); + return (ret); } @@ -479,10 +485,6 @@ __evict_review( */ if (LF_ISSET(WT_EVICT_INMEM_SPLIT)) return (__wt_split_insert(session, ref)); - - /* If splits are the only permitted operation, we're done. */ - if (F_ISSET(S2BT(session), WT_BTREE_ALLOW_SPLITS)) - return (EBUSY); } /* If the page is clean, we're done and we can evict. */ diff --git a/src/include/btree.h b/src/include/btree.h index 19db27d84a2..8ce77b5ecd3 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -151,7 +151,7 @@ struct __wt_btree { volatile uint32_t evict_busy; /* Count of threads in eviction */ int evict_start_type; /* Start position for eviction walk (see WT_EVICT_WALK_START). */ - enum { + volatile enum { WT_CKPT_OFF, WT_CKPT_PREPARE, WT_CKPT_RUNNING } checkpointing; /* Checkpoint in progress */ @@ -163,19 +163,18 @@ struct __wt_btree { WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */ /* Flags values up to 0xff are reserved for WT_DHANDLE_* */ -#define WT_BTREE_ALLOW_SPLITS 0x000100 /* Allow splits, even with no evict */ -#define WT_BTREE_BULK 0x000200 /* Bulk-load handle */ -#define WT_BTREE_CLOSED 0x000400 /* Handle closed */ -#define WT_BTREE_IGNORE_CACHE 0x000800 /* Cache-resident object */ -#define WT_BTREE_IN_MEMORY 0x001000 /* Cache-resident object */ -#define WT_BTREE_LOOKASIDE 0x002000 /* Look-aside table */ -#define WT_BTREE_NO_CHECKPOINT 0x004000 /* Disable checkpoints */ -#define WT_BTREE_NO_LOGGING 0x008000 /* Disable logging */ -#define WT_BTREE_REBALANCE 0x020000 /* Handle is for rebalance */ -#define WT_BTREE_SALVAGE 0x040000 /* Handle is for salvage */ -#define WT_BTREE_SKIP_CKPT 0x080000 /* Handle skipped checkpoint */ -#define WT_BTREE_UPGRADE 0x100000 /* Handle is for upgrade */ -#define WT_BTREE_VERIFY 0x200000 /* Handle is for verify */ +#define WT_BTREE_BULK 0x000100 /* Bulk-load handle */ +#define WT_BTREE_CLOSED 0x000200 /* Handle closed */ +#define WT_BTREE_IGNORE_CACHE 0x000400 /* Cache-resident object */ +#define WT_BTREE_IN_MEMORY 0x000800 /* Cache-resident object */ +#define WT_BTREE_LOOKASIDE 0x001000 /* Look-aside table */ +#define WT_BTREE_NO_CHECKPOINT 0x002000 /* Disable checkpoints */ +#define WT_BTREE_NO_LOGGING 0x004000 /* Disable logging */ +#define WT_BTREE_REBALANCE 0x008000 /* Handle is for rebalance */ +#define WT_BTREE_SALVAGE 0x010000 /* Handle is for salvage */ +#define WT_BTREE_SKIP_CKPT 0x020000 /* Handle skipped checkpoint */ +#define WT_BTREE_UPGRADE 0x040000 /* Handle is for upgrade */ +#define WT_BTREE_VERIFY 0x080000 /* Handle is for verify */ uint32_t flags; }; diff --git a/src/include/btree.i b/src/include/btree.i index 474b40bf805..d4db65b2033 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1285,6 +1285,16 @@ __wt_page_can_evict( if (mod == NULL) return (true); + /* + * We can't split or evict multiblock row-store pages where the parent's + * key for the page is an overflow item, because the split into the + * parent frees the backing blocks for any no-longer-used overflow keys, + * which will corrupt the checkpoint's block management. + */ + if (btree->checkpointing != WT_CKPT_OFF && + F_ISSET_ATOMIC(ref->home, WT_PAGE_OVERFLOW_KEYS)) + return (false); + /* * Check for in-memory splits before other eviction tests. If the page * should split in-memory, return success immediately and skip more @@ -1311,16 +1321,6 @@ __wt_page_can_evict( return (false); } - /* - * We can't evict clean, multiblock row-store pages where the parent's - * key for the page is an overflow item, because the split into the - * parent frees the backing blocks for any no-longer-used overflow keys, - * which will corrupt the checkpoint's block management. - */ - if (btree->checkpointing != WT_CKPT_OFF && - F_ISSET_ATOMIC(ref->home, WT_PAGE_OVERFLOW_KEYS)) - return (false); - /* * If a split created new internal pages, those newly created internal * pages cannot be evicted until all threads are known to have exited diff --git a/src/include/extern.h b/src/include/extern.h index ff835bacc56..4f38b7ac433 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -649,6 +649,8 @@ extern int __wt_unexpected_object_type( WT_SESSION_IMPL *session, const char *ur extern void __wt_gen_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern uint64_t __wt_gen(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern uint64_t __wt_gen_next(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern uint64_t __wt_gen_next_drain(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_gen_drain(WT_SESSION_IMPL *session, int which, uint64_t generation) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern uint64_t __wt_gen_oldest(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern uint64_t __wt_session_gen(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_session_gen_enter(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/session.h b/src/include/session.h index adef5e39068..de2c1463684 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -169,10 +169,11 @@ struct __wt_session_impl { /* Generations manager */ #define WT_GEN_CHECKPOINT 0 /* Checkpoint generation */ -#define WT_GEN_HAZARD 1 /* Hazard pointer */ -#define WT_GEN_SCHEMA 2 /* Schema version */ -#define WT_GEN_SPLIT 3 /* Page splits */ -#define WT_GENERATIONS 4 /* Total generation manager entries */ +#define WT_GEN_EVICT 1 /* Eviction generation */ +#define WT_GEN_HAZARD 2 /* Hazard pointer */ +#define WT_GEN_SCHEMA 3 /* Schema version */ +#define WT_GEN_SPLIT 4 /* Page splits */ +#define WT_GENERATIONS 5 /* Total generation manager entries */ volatile uint64_t generations[WT_GENERATIONS]; /* diff --git a/src/support/generation.c b/src/support/generation.c index ed615d4c7cd..6e16d7e57fe 100644 --- a/src/support/generation.c +++ b/src/support/generation.c @@ -57,14 +57,12 @@ __wt_gen_next(WT_SESSION_IMPL *session, int which) return (__wt_atomic_addv64(&S2C(session)->generations[which], 1)); } -#if 0 /* * __wt_gen_next_drain -- * Switch the resource to its next generation, then wait for it to drain. */ uint64_t - TABBED IN to avoid dist/ functions: - __wt_gen_next_drain(WT_SESSION_IMPL *session, int which) +__wt_gen_next_drain(WT_SESSION_IMPL *session, int which) { uint64_t v; @@ -80,8 +78,7 @@ uint64_t * Wait for the resource to drain. */ void - TABBED IN to avoid dist/ functions: - __wt_gen_drain(WT_SESSION_IMPL *session, int which, uint64_t generation) +__wt_gen_drain(WT_SESSION_IMPL *session, int which, uint64_t generation) { WT_CONNECTION_IMPL *conn; WT_SESSION_IMPL *s; @@ -109,7 +106,14 @@ void /* Ensure we only read the value once. */ WT_ORDERED_READ(v, s->generations[which]); - if (v == 0 || generation <= v) + /* + * The generation argument is newer than the limit. Wait + * for threads in generations older than the argument + * generation, threads in argument generations are OK. + * + * The thread's generation may be 0 (that is, not set). + */ + if (v == 0 || v >= generation) break; /* @@ -124,7 +128,6 @@ void } } } -#endif /* * __wt_gen_oldest -- @@ -156,6 +159,7 @@ __wt_gen_oldest(WT_SESSION_IMPL *session, int which) /* Ensure we only read the value once. */ WT_ORDERED_READ(v, s->generations[which]); + if (v != 0 && v < oldest) oldest = v; } @@ -328,7 +332,7 @@ __wt_stash_discard_all(WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session) /* * This function is called during WT_CONNECTION.close to discard any - * memory that remains. For that reason, we take two WT_SESSION_IMPL + * memory that remains. For that reason, we take two WT_SESSION_IMPL * arguments: session_safe is still linked to the WT_CONNECTION and * can be safely used for calls to other WiredTiger functions, while * session is the WT_SESSION_IMPL we're cleaning up. -- cgit v1.2.1 From 14288b438654483e3caf49b4af4adaa1b8258f49 Mon Sep 17 00:00:00 2001 From: David Hows Date: Thu, 13 Apr 2017 15:24:47 +1000 Subject: WT-3041 Remove test_perf001 (#3385) --- test/suite/test_perf001.py | 70 ---------------------------------------------- 1 file changed, 70 deletions(-) delete mode 100644 test/suite/test_perf001.py diff --git a/test/suite/test_perf001.py b/test/suite/test_perf001.py deleted file mode 100644 index 4ab958996f0..00000000000 --- a/test/suite/test_perf001.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-2017 MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# test_perf001.py -# Test performance when inserting into a table with an index. - -import wiredtiger, wttest -import random -from time import clock, time -from wtscenario import make_scenarios - -# Test performance of inserting into a table with an index. -class test_perf001(wttest.WiredTigerTestCase): - table_name = 'test_perf001' - - scenarios = make_scenarios([ - #('file-file', dict(tabletype='file',indextype='file')), - ('file-lsm', dict(tabletype='file',indextype='lsm', cfg='', - conn_config="statistics=(fast),statistics_log=(wait=1)")), - #('lsm-file', dict(tabletype='lsm',indextype='file')), - #('lsm-lsm', dict(tabletype='lsm',indextype='lsm')), - ]) - conn_config = 'cache_size=512M' - - def test_performance_of_indices(self): - uri = 'table:' + self.table_name - create_args = 'key_format=i,value_format=ii,columns=(a,c,d),type=' + self.tabletype - self.session.create(uri, create_args) - self.session.create('index:' + self.table_name + ':ia', - 'columns=(d,c),type=' + self.indextype) - - c = self.session.open_cursor('table:' + self.table_name, None, None) - start_time = clock() - for i in xrange(750000): - # 100 operations should never take 5 seconds, sometimes they take - # 2 seconds when a page is being force-evicted. - if i % 100 == 0 and i != 0: - end_time = clock() - self.assertTrue(end_time - start_time < 5) - start_time = end_time - c[i] = (int(time()), random.randint(1,5)) - c.close() - -if __name__ == '__main__': - wttest.run() -- cgit v1.2.1 From a07222c50ef9b07a9265aaa22573b41a5bb7356b Mon Sep 17 00:00:00 2001 From: sueloverso Date: Thu, 13 Apr 2017 15:33:32 -0400 Subject: WT-3222 Review and enhance log statistics (#3369) Including removing a few redundant statistics, adding some new statistics, and improving the descriptions of others. --- dist/stat_data.py | 20 +++-- src/docs/upgrading.dox | 9 ++ src/include/stat.h | 20 +++-- src/include/wiredtiger.in | 220 ++++++++++++++++++++++++--------------------- src/log/log_slot.c | 58 +++++++++--- src/support/stat.c | 65 +++++++++----- tools/wtstats/stat_data.py | 2 + 7 files changed, 240 insertions(+), 154 deletions(-) diff --git a/dist/stat_data.py b/dist/stat_data.py index ac79ffd029a..9db0b3b0e80 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -324,16 +324,22 @@ connection_stats = [ LogStat('log_scan_records', 'records processed by log scan'), LogStat('log_scan_rereads', 'log scan records requiring two reads'), LogStat('log_scans', 'log scan operations'), - LogStat('log_slot_active_closed', 'consolidated slot join active slot closed'), - LogStat('log_slot_closes', 'consolidated slot closures'), + LogStat('log_slot_active_closed', 'slot join found active slot closed'), + LogStat('log_slot_close_race', 'slot close lost race'), + LogStat('log_slot_close_unbuf', 'slot close unbuffered waits'), + LogStat('log_slot_closes', 'slot closures'), LogStat('log_slot_coalesced', 'written slots coalesced'), LogStat('log_slot_consolidated', 'logging bytes consolidated', 'size'), - LogStat('log_slot_joins', 'consolidated slot joins'), - LogStat('log_slot_no_free_slots', 'consolidated slot transitions unable to find free slot'), - LogStat('log_slot_races', 'consolidated slot join races'), + LogStat('log_slot_immediate', 'slot join calls did not yield'), + LogStat('log_slot_no_free_slots', 'slot transitions unable to find free slot'), + LogStat('log_slot_races', 'slot join atomic update races'), LogStat('log_slot_switch_busy', 'busy returns attempting to switch slots'), - LogStat('log_slot_transitions', 'consolidated slot join transitions'), - LogStat('log_slot_unbuffered', 'consolidated slot unbuffered writes'), + LogStat('log_slot_unbuffered', 'slot unbuffered writes'), + LogStat('log_slot_yield', 'slot join calls yielded'), + LogStat('log_slot_yield_close', 'slot join calls found active slot closed'), + LogStat('log_slot_yield_duration', 'slot joins yield time (usecs)', 'no_clear,no_scale'), + LogStat('log_slot_yield_race', 'slot join calls atomic updates raced'), + LogStat('log_slot_yield_sleep', 'slot join calls slept'), LogStat('log_sync', 'log sync operations'), LogStat('log_sync_dir', 'log sync_dir operations'), LogStat('log_sync_dir_duration', 'log sync_dir time duration (usecs)', 'no_clear,no_scale'), diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index e5fce3d0d5d..53eb287d9a6 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -16,6 +16,15 @@ have switched that lock from a spin lock to a read-write lock, and consequently changed the statistics tracking lock related wait time.
+
Logging subsystem statistics
+
+Two logging subsystem statistics have been removed as they were a duplicate of +other statistics. The \c log_slot_joins and \c log_slot_transitions statistics +are no longer present. They were duplicates of \c log_writes and +\c log_slot_closes respectively. Several new logging related statistics have +been added. +
+
Forced and named checkpoint error conditions changed
There are new cases where checkpoints created with an explicit name or the diff --git a/src/include/stat.h b/src/include/stat.h index 5603e404e13..a537940d075 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -404,13 +404,6 @@ struct __wt_connection_stats { int64_t lock_table_wait_application; int64_t lock_table_wait_internal; int64_t log_slot_switch_busy; - int64_t log_slot_closes; - int64_t log_slot_active_closed; - int64_t log_slot_races; - int64_t log_slot_transitions; - int64_t log_slot_joins; - int64_t log_slot_no_free_slots; - int64_t log_slot_unbuffered; int64_t log_bytes_payload; int64_t log_bytes_written; int64_t log_zero_fills; @@ -437,6 +430,19 @@ struct __wt_connection_stats { int64_t log_prealloc_files; int64_t log_prealloc_used; int64_t log_scan_records; + int64_t log_slot_close_race; + int64_t log_slot_close_unbuf; + int64_t log_slot_closes; + int64_t log_slot_races; + int64_t log_slot_yield_race; + int64_t log_slot_immediate; + int64_t log_slot_yield_close; + int64_t log_slot_yield_sleep; + int64_t log_slot_yield; + int64_t log_slot_active_closed; + int64_t log_slot_yield_duration; + int64_t log_slot_no_free_slots; + int64_t log_slot_unbuffered; int64_t log_compress_mem; int64_t log_buffer_size; int64_t log_compress_len; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 31b1c16cd6c..20db139ff8e 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -4655,212 +4655,224 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1145 /*! log: busy returns attempting to switch slots */ #define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1146 -/*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1147 -/*! log: consolidated slot join active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1148 -/*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1149 -/*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1150 -/*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1151 -/*! log: consolidated slot transitions unable to find free slot */ -#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1152 -/*! log: consolidated slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1153 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1154 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1147 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1155 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1148 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1156 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1149 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1157 +#define WT_STAT_CONN_LOG_FLUSH 1150 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1158 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1151 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1159 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1152 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1160 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1153 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1161 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1154 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1162 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1155 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1163 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1156 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1164 +#define WT_STAT_CONN_LOG_SCANS 1157 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1165 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1158 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1166 +#define WT_STAT_CONN_LOG_WRITE_LSN 1159 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1167 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1160 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1168 +#define WT_STAT_CONN_LOG_SYNC 1161 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1169 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1162 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1170 +#define WT_STAT_CONN_LOG_SYNC_DIR 1163 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1171 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1164 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1172 +#define WT_STAT_CONN_LOG_WRITES 1165 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1173 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1166 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1174 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1167 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1175 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1168 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1176 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1169 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1177 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1170 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1178 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1171 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1179 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1172 +/*! log: slot close lost race */ +#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1173 +/*! log: slot close unbuffered waits */ +#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1174 +/*! log: slot closures */ +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1175 +/*! log: slot join atomic update races */ +#define WT_STAT_CONN_LOG_SLOT_RACES 1176 +/*! log: slot join calls atomic updates raced */ +#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1177 +/*! log: slot join calls did not yield */ +#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1178 +/*! log: slot join calls found active slot closed */ +#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1179 +/*! log: slot join calls slept */ +#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1180 +/*! log: slot join calls yielded */ +#define WT_STAT_CONN_LOG_SLOT_YIELD 1181 +/*! log: slot join found active slot closed */ +#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1182 +/*! log: slot joins yield time (usecs) */ +#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1183 +/*! log: slot transitions unable to find free slot */ +#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1184 +/*! log: slot unbuffered writes */ +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1185 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1180 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1186 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1181 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1187 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1182 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1188 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1183 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1189 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1184 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1190 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1185 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1191 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1186 +#define WT_STAT_CONN_REC_PAGES 1192 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1187 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1193 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1188 +#define WT_STAT_CONN_REC_PAGE_DELETE 1194 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1189 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1195 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1190 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1196 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1191 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1197 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1192 +#define WT_STAT_CONN_SESSION_OPEN 1198 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1193 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1199 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1194 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1200 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1195 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1201 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1196 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1202 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1197 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1203 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1198 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1204 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1199 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1205 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1200 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1206 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1201 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1207 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1202 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1208 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1203 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1209 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1204 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1210 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1205 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1211 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1206 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1212 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1207 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1213 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1208 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1214 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1209 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1215 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1210 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1216 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1211 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1217 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1212 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1218 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1213 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1219 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1214 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1220 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1215 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1221 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1216 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1222 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1217 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1223 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1218 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1224 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1219 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1225 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1220 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1226 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1221 +#define WT_STAT_CONN_PAGE_SLEEP 1227 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1222 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1228 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1223 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1229 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1224 +#define WT_STAT_CONN_TXN_BEGIN 1230 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1225 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1231 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1226 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1232 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1227 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1233 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1228 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1234 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1229 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1235 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1230 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1236 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1231 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1237 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1232 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1238 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1233 +#define WT_STAT_CONN_TXN_CHECKPOINT 1239 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1234 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1240 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1235 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1241 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1236 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1242 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1237 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1243 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1238 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1244 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1239 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1245 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1240 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1246 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1241 +#define WT_STAT_CONN_TXN_SYNC 1247 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1242 +#define WT_STAT_CONN_TXN_COMMIT 1248 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1243 +#define WT_STAT_CONN_TXN_ROLLBACK 1249 /*! * @} diff --git a/src/log/log_slot.c b/src/log/log_slot.c index c92929c91b7..a9655dd903e 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -126,13 +126,17 @@ retry: * processed by another closing thread. Only return 0 when we * actually closed the slot. */ - if (WT_LOG_SLOT_CLOSED(old_state)) + if (WT_LOG_SLOT_CLOSED(old_state)) { + WT_STAT_CONN_INCR(session, log_slot_close_race); return (WT_NOTFOUND); + } /* * If someone completely processed this slot, we're done. */ - if (FLD64_ISSET((uint64_t)slot->slot_state, WT_LOG_SLOT_RESERVED)) + if (FLD64_ISSET((uint64_t)slot->slot_state, WT_LOG_SLOT_RESERVED)) { + WT_STAT_CONN_INCR(session, log_slot_close_race); return (WT_NOTFOUND); + } new_state = (old_state | WT_LOG_SLOT_CLOSE); /* * Close this slot. If we lose the race retry. @@ -161,6 +165,7 @@ retry: if (WT_LOG_SLOT_UNBUFFERED_ISSET(old_state)) { while (slot->slot_unbuffered == 0) { WT_RET(WT_SESSION_CHECK_PANIC(session)); + WT_STAT_CONN_INCR(session, log_slot_close_unbuf); __wt_yield(); #ifdef HAVE_DIAGNOSTIC ++count; @@ -250,8 +255,6 @@ __log_slot_new(WT_SESSION_IMPL *session) * We have a new, initialized slot to use. * Set it as the active slot. */ - WT_STAT_CONN_INCR(session, - log_slot_transitions); log->active_slot = slot; log->pool_index = pool_i; return (0); @@ -496,12 +499,14 @@ int __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) { + struct timespec start, stop; WT_CONNECTION_IMPL *conn; WT_LOG *log; WT_LOGSLOT *slot; + uint64_t usecs; int64_t flag_state, new_state, old_state, released; - int32_t join_offset, new_join; - bool unbuffered, yld; + int32_t join_offset, new_join, wait_cnt; + bool closed, diag_yield, raced, slept, unbuffered, yielded; conn = S2C(session); log = conn->log; @@ -512,13 +517,15 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, /* * There should almost always be a slot open. */ - unbuffered = false; + unbuffered = yielded = false; + closed = raced = slept = false; + wait_cnt = 0; #ifdef HAVE_DIAGNOSTIC - yld = (++log->write_calls % 7) == 0; + diag_yield = (++log->write_calls % 7) == 0; if ((log->write_calls % WT_THOUSAND) == 0 || mysize > WT_LOG_SLOT_BUF_MAX) { #else - yld = false; + diag_yield = false; if (mysize > WT_LOG_SLOT_BUF_MAX) { #endif unbuffered = true; @@ -548,7 +555,7 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, /* * Braces used due to potential empty body warning. */ - if (yld) { + if (diag_yield) { WT_DIAGNOSTIC_YIELD; } /* @@ -558,19 +565,44 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, &slot->slot_state, old_state, new_state)) break; WT_STAT_CONN_INCR(session, log_slot_races); - } else + raced = true; + } else { WT_STAT_CONN_INCR(session, log_slot_active_closed); + closed = true; + ++wait_cnt; + } + if (!yielded) + __wt_epoch(session, &start); + yielded = true; /* * The slot is no longer open or we lost the race to * update it. Yield and try again. */ - __wt_yield(); + if (wait_cnt < WT_THOUSAND) + __wt_yield(); + else { + __wt_sleep(0, WT_THOUSAND); + slept = true; + } } /* * We joined this slot. Fill in our information to return to * the caller. */ - WT_STAT_CONN_INCR(session, log_slot_joins); + if (!yielded) + WT_STAT_CONN_INCR(session, log_slot_immediate); + else { + WT_STAT_CONN_INCR(session, log_slot_yield); + __wt_epoch(session, &stop); + usecs = WT_TIMEDIFF_US(stop, start); + WT_STAT_CONN_INCRV(session, log_slot_yield_duration, usecs); + if (closed) + WT_STAT_CONN_INCR(session, log_slot_yield_close); + if (raced) + WT_STAT_CONN_INCR(session, log_slot_yield_race); + if (slept) + WT_STAT_CONN_INCR(session, log_slot_yield_sleep); + } if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC)) F_SET(slot, WT_SLOT_SYNC_DIR); if (LF_ISSET(WT_LOG_FLUSH)) diff --git a/src/support/stat.c b/src/support/stat.c index 2c2217f8c20..2b55ae37541 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -771,13 +771,6 @@ static const char * const __stats_connection_desc[] = { "lock: table lock application thread time waiting for the table lock (usecs)", "lock: table lock internal thread time waiting for the table lock (usecs)", "log: busy returns attempting to switch slots", - "log: consolidated slot closures", - "log: consolidated slot join active slot closed", - "log: consolidated slot join races", - "log: consolidated slot join transitions", - "log: consolidated slot joins", - "log: consolidated slot transitions unable to find free slot", - "log: consolidated slot unbuffered writes", "log: log bytes of payload data", "log: log bytes written", "log: log files manually zero-filled", @@ -804,6 +797,19 @@ static const char * const __stats_connection_desc[] = { "log: pre-allocated log files prepared", "log: pre-allocated log files used", "log: records processed by log scan", + "log: slot close lost race", + "log: slot close unbuffered waits", + "log: slot closures", + "log: slot join atomic update races", + "log: slot join calls atomic updates raced", + "log: slot join calls did not yield", + "log: slot join calls found active slot closed", + "log: slot join calls slept", + "log: slot join calls yielded", + "log: slot join found active slot closed", + "log: slot joins yield time (usecs)", + "log: slot transitions unable to find free slot", + "log: slot unbuffered writes", "log: total in-memory size of compressed records", "log: total log buffer size", "log: total size of compressed records", @@ -1057,13 +1063,6 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->lock_table_wait_application = 0; stats->lock_table_wait_internal = 0; stats->log_slot_switch_busy = 0; - stats->log_slot_closes = 0; - stats->log_slot_active_closed = 0; - stats->log_slot_races = 0; - stats->log_slot_transitions = 0; - stats->log_slot_joins = 0; - stats->log_slot_no_free_slots = 0; - stats->log_slot_unbuffered = 0; stats->log_bytes_payload = 0; stats->log_bytes_written = 0; stats->log_zero_fills = 0; @@ -1090,6 +1089,19 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->log_prealloc_files = 0; stats->log_prealloc_used = 0; stats->log_scan_records = 0; + stats->log_slot_close_race = 0; + stats->log_slot_close_unbuf = 0; + stats->log_slot_closes = 0; + stats->log_slot_races = 0; + stats->log_slot_yield_race = 0; + stats->log_slot_immediate = 0; + stats->log_slot_yield_close = 0; + stats->log_slot_yield_sleep = 0; + stats->log_slot_yield = 0; + stats->log_slot_active_closed = 0; + /* not clearing log_slot_yield_duration */ + stats->log_slot_no_free_slots = 0; + stats->log_slot_unbuffered = 0; stats->log_compress_mem = 0; /* not clearing log_buffer_size */ stats->log_compress_len = 0; @@ -1373,15 +1385,6 @@ __wt_stat_connection_aggregate( to->lock_table_wait_internal += WT_STAT_READ(from, lock_table_wait_internal); to->log_slot_switch_busy += WT_STAT_READ(from, log_slot_switch_busy); - to->log_slot_closes += WT_STAT_READ(from, log_slot_closes); - to->log_slot_active_closed += - WT_STAT_READ(from, log_slot_active_closed); - to->log_slot_races += WT_STAT_READ(from, log_slot_races); - to->log_slot_transitions += WT_STAT_READ(from, log_slot_transitions); - to->log_slot_joins += WT_STAT_READ(from, log_slot_joins); - to->log_slot_no_free_slots += - WT_STAT_READ(from, log_slot_no_free_slots); - to->log_slot_unbuffered += WT_STAT_READ(from, log_slot_unbuffered); to->log_bytes_payload += WT_STAT_READ(from, log_bytes_payload); to->log_bytes_written += WT_STAT_READ(from, log_bytes_written); to->log_zero_fills += WT_STAT_READ(from, log_zero_fills); @@ -1412,6 +1415,22 @@ __wt_stat_connection_aggregate( to->log_prealloc_files += WT_STAT_READ(from, log_prealloc_files); to->log_prealloc_used += WT_STAT_READ(from, log_prealloc_used); to->log_scan_records += WT_STAT_READ(from, log_scan_records); + to->log_slot_close_race += WT_STAT_READ(from, log_slot_close_race); + to->log_slot_close_unbuf += WT_STAT_READ(from, log_slot_close_unbuf); + to->log_slot_closes += WT_STAT_READ(from, log_slot_closes); + to->log_slot_races += WT_STAT_READ(from, log_slot_races); + to->log_slot_yield_race += WT_STAT_READ(from, log_slot_yield_race); + to->log_slot_immediate += WT_STAT_READ(from, log_slot_immediate); + to->log_slot_yield_close += WT_STAT_READ(from, log_slot_yield_close); + to->log_slot_yield_sleep += WT_STAT_READ(from, log_slot_yield_sleep); + to->log_slot_yield += WT_STAT_READ(from, log_slot_yield); + to->log_slot_active_closed += + WT_STAT_READ(from, log_slot_active_closed); + to->log_slot_yield_duration += + WT_STAT_READ(from, log_slot_yield_duration); + to->log_slot_no_free_slots += + WT_STAT_READ(from, log_slot_no_free_slots); + to->log_slot_unbuffered += WT_STAT_READ(from, log_slot_unbuffered); to->log_compress_mem += WT_STAT_READ(from, log_compress_mem); to->log_buffer_size += WT_STAT_READ(from, log_buffer_size); to->log_compress_len += WT_STAT_READ(from, log_compress_len); diff --git a/tools/wtstats/stat_data.py b/tools/wtstats/stat_data.py index a94ce524ae3..09fca2b9525 100644 --- a/tools/wtstats/stat_data.py +++ b/tools/wtstats/stat_data.py @@ -26,6 +26,7 @@ no_scale_per_second_list = [ 'log: log sync_dir time duration (usecs)', 'log: maximum log file size', 'log: number of pre-allocated log files to create', + 'log: slot joins yield time (usecs)', 'log: total log buffer size', 'LSM: application work units currently queued', 'LSM: merge work units currently queued', @@ -145,6 +146,7 @@ no_clear_list = [ 'log: log sync_dir time duration (usecs)', 'log: maximum log file size', 'log: number of pre-allocated log files to create', + 'log: slot joins yield time (usecs)', 'log: total log buffer size', 'LSM: application work units currently queued', 'LSM: merge work units currently queued', -- cgit v1.2.1 From ba5cccea14a2afae8dc62765735be809908e0e39 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Sat, 15 Apr 2017 06:01:02 +1000 Subject: WT-3160 Queue empty internal pages for eviction. (#3357) Also reduce the skew against internal pages by only queuing internal pages when we get aggressive or when a tree is idle. --- src/bloom/bloom.c | 8 ++++++-- src/evict/evict_lru.c | 44 +++++++++++++++++++++++++++----------------- src/include/cache.h | 6 +----- src/meta/meta_table.c | 3 ++- 4 files changed, 36 insertions(+), 25 deletions(-) diff --git a/src/bloom/bloom.c b/src/bloom/bloom.c index 3a1e861fb5d..bfbfa34078f 100644 --- a/src/bloom/bloom.c +++ b/src/bloom/bloom.c @@ -133,8 +133,12 @@ __bloom_open_cursor(WT_BLOOM *bloom, WT_CURSOR *owner) c = NULL; WT_RET(__wt_open_cursor(session, bloom->uri, owner, cfg, &c)); - /* Bump the cache priority for Bloom filters. */ - __wt_evict_priority_set(session, WT_EVICT_INT_SKEW); + /* + * Bump the cache priority for Bloom filters: this makes eviction favor + * pages from other trees over Bloom filters. + */ +#define WT_EVICT_BLOOM_SKEW 1000 + __wt_evict_priority_set(session, WT_EVICT_BLOOM_SKEW); bloom->c = c; return (0); diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 7ad9f377809..041e557ef78 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -95,12 +95,8 @@ __evict_entry_priority(WT_SESSION_IMPL *session, WT_REF *ref) if (page->read_gen == WT_READGEN_OLDEST) return (WT_READGEN_OLDEST); - /* - * Any leaf page from a dead tree is a great choice (not internal pages, - * they may have children and are not yet evictable). - */ - if (!WT_PAGE_IS_INTERNAL(page) && - F_ISSET(btree->dhandle, WT_DHANDLE_DEAD)) + /* Any page from a dead tree is a great choice. */ + if (F_ISSET(btree->dhandle, WT_DHANDLE_DEAD)) return (WT_READGEN_OLDEST); /* Any empty page (leaf or internal), is a good choice. */ @@ -123,8 +119,10 @@ __evict_entry_priority(WT_SESSION_IMPL *session, WT_REF *ref) read_gen = page->read_gen; read_gen += btree->evict_priority; + +#define WT_EVICT_INTL_SKEW 1000 if (WT_PAGE_IS_INTERNAL(page)) - read_gen += WT_EVICT_INT_SKEW; + read_gen += WT_EVICT_INTL_SKEW; return (read_gen); } @@ -1580,7 +1578,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_EVICT_ENTRY *end, *evict, *start; - WT_PAGE *page; + WT_PAGE *last_parent, *page; WT_PAGE_MODIFY *mod; WT_REF *ref; WT_TXN_GLOBAL *txn_global; @@ -1588,14 +1586,15 @@ __evict_walk_file(WT_SESSION_IMPL *session, uint64_t pages_seen, pages_queued, refs_walked; uint32_t remaining_slots, total_slots, walk_flags; uint32_t target_pages_clean, target_pages_dirty, target_pages; - int internal_pages, restarts; + int restarts; bool give_up, modified, urgent_queued; conn = S2C(session); btree = S2BT(session); cache = conn->cache; txn_global = &conn->txn_global; - internal_pages = restarts = 0; + last_parent = NULL; + restarts = 0; give_up = urgent_queued = false; /* @@ -1750,6 +1749,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, */ for (evict = start, pages_queued = pages_seen = refs_walked = 0; evict < end && (ret == 0 || ret == WT_NOTFOUND); + last_parent = ref == NULL ? NULL : ref->home, ret = __wt_tree_walk_count( session, &ref, &refs_walked, walk_flags)) { /* @@ -1830,10 +1830,23 @@ __evict_walk_file(WT_SESSION_IMPL *session, if (modified && !F_ISSET(cache, WT_CACHE_EVICT_DIRTY)) continue; - /* Limit internal pages to 50% of the total. */ - if (WT_PAGE_IS_INTERNAL(page) && - internal_pages > (int)(evict - start) / 2) - continue; + /* + * Don't attempt eviction of internal pages with children in + * cache (indicated by seeing an internal page that is the + * parent of the last page we saw). + * + * Also skip internal page unless we get aggressive or the tree + * is idle (indicated by the tree being skipped for walks). + * The goal here is that if trees become completely idle, we + * eventually push them out of cache completely. + */ + if (WT_PAGE_IS_INTERNAL(page)) { + if (page == last_parent) + continue; + if (btree->evict_walk_period == 0 && + !__wt_cache_aggressive(session)) + continue; + } /* If eviction gets aggressive, anything else is fair game. */ if (__wt_cache_aggressive(session)) @@ -1862,9 +1875,6 @@ fast: /* If the page can't be evicted, give up. */ ++evict; ++pages_queued; - if (WT_PAGE_IS_INTERNAL(page)) - ++internal_pages; - __wt_verbose(session, WT_VERB_EVICTSERVER, "select: %p, size %" WT_SIZET_FMT, (void *)page, page->memory_footprint); diff --git a/src/include/cache.h b/src/include/cache.h index 6e79c2a5868..8f439599eca 100644 --- a/src/include/cache.h +++ b/src/include/cache.h @@ -10,14 +10,10 @@ * Tuning constants: I hesitate to call this tuning, but we want to review some * number of pages from each file's in-memory tree for each page we evict. */ -#define WT_EVICT_INT_SKEW (1<<20) /* Prefer leaf pages over internal - pages by this many increments of the - read generation. */ +#define WT_EVICT_MAX_TREES 1000 /* Maximum walk points */ #define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */ #define WT_EVICT_WALK_INCR 100 /* Pages added each walk */ -#define WT_EVICT_MAX_TREES 1000 /* Maximum walk points */ - /* Ways to position when starting an eviction walk. */ typedef enum { WT_EVICT_WALK_NEXT, diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c index 827a440073a..65835a16c8b 100644 --- a/src/meta/meta_table.c +++ b/src/meta/meta_table.c @@ -62,9 +62,10 @@ __wt_metadata_cursor_open( * first update is safe because it's single-threaded from * wiredtiger_open). */ +#define WT_EVICT_META_SKEW 10000 if (btree->evict_priority == 0) WT_WITH_BTREE(session, btree, - __wt_evict_priority_set(session, WT_EVICT_INT_SKEW)); + __wt_evict_priority_set(session, WT_EVICT_META_SKEW)); if (F_ISSET(btree, WT_BTREE_NO_LOGGING)) F_CLR(btree, WT_BTREE_NO_LOGGING); -- cgit v1.2.1 From 9e1fd9bbc2290be4a35c4a29a9f76133c97de0a9 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 18 Apr 2017 02:23:57 -0400 Subject: WT-3268 Failure to close cursor can get wiredtiger stuck in a cursor-close loop (#3378) Traversing lists to close handles could turn into an infinite loop on error, the underlying close functions could return without unlinking the cursor from its linked list. Add a macro pair, `WT_TAILQ_SAFE_REMOVE_BEGIN/END`, that include a test that if we see the same element twice on a linked list, we remove it so we don't loop forever. Clean up various loops that remove elements from lists to either use the standard `TAILQ_FOREACH_SAFE`, the new macro or a pattern where the `TAILQ_REMOVE` is explicit in the loop. --- examples/c/ex_file_system.c | 4 +-- ext/test/fail_fs/fail_fs.c | 4 +-- src/async/async_api.c | 8 ++--- src/async/async_worker.c | 8 ++--- src/conn/conn_api.c | 24 +++++++------- src/conn/conn_dhandle.c | 5 +-- src/conn/conn_sweep.c | 7 ++--- src/include/misc.h | 19 +++++++++++ src/lsm/lsm_manager.c | 16 +++------- src/lsm/lsm_tree.c | 7 +++-- src/os_common/os_fhandle.c | 71 ++++++++++++++++++++++++------------------ src/os_common/os_fs_inmemory.c | 19 ++++++----- src/schema/schema_list.c | 6 ++-- src/session/session_api.c | 6 ++-- src/session/session_dhandle.c | 14 ++++----- 15 files changed, 119 insertions(+), 99 deletions(-) diff --git a/examples/c/ex_file_system.c b/examples/c/ex_file_system.c index 40f1d66cbc1..e454d228c39 100644 --- a/examples/c/ex_file_system.c +++ b/examples/c/ex_file_system.c @@ -583,13 +583,13 @@ demo_fs_size(WT_FILE_SYSTEM *file_system, static int demo_fs_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *session) { - DEMO_FILE_HANDLE *demo_fh; + DEMO_FILE_HANDLE *demo_fh, *demo_fh_tmp; DEMO_FILE_SYSTEM *demo_fs; int ret = 0, tret; demo_fs = (DEMO_FILE_SYSTEM *)file_system; - while ((demo_fh = TAILQ_FIRST(&demo_fs->fileq)) != NULL) + TAILQ_FOREACH_SAFE(demo_fh, &demo_fs->fileq, q, demo_fh_tmp) if ((tret = demo_handle_remove(session, demo_fh)) != 0 && ret == 0) ret = tret; diff --git a/ext/test/fail_fs/fail_fs.c b/ext/test/fail_fs/fail_fs.c index b4add92be94..fd01ec66c68 100644 --- a/ext/test/fail_fs/fail_fs.c +++ b/ext/test/fail_fs/fail_fs.c @@ -740,12 +740,12 @@ fail_fs_size(WT_FILE_SYSTEM *file_system, static int fail_fs_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *session) { - FAIL_FILE_HANDLE *fail_fh; + FAIL_FILE_HANDLE *fail_fh, *fail_fh_tmp; FAIL_FILE_SYSTEM *fail_fs; fail_fs = (FAIL_FILE_SYSTEM *)file_system; - while ((fail_fh = TAILQ_FIRST(&fail_fs->fileq)) != NULL) + TAILQ_FOREACH_SAFE(fail_fh, &fail_fs->fileq, q, fail_fh_tmp) fail_file_handle_remove(session, fail_fh); fail_fs_destroy_lock(&fail_fs->lock); diff --git a/src/async/async_api.c b/src/async/async_api.c index ef3af8d15d3..e4943e61ed4 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -420,7 +420,7 @@ int __wt_async_destroy(WT_SESSION_IMPL *session) { WT_ASYNC *async; - WT_ASYNC_FORMAT *af, *afnext; + WT_ASYNC_FORMAT *af; WT_ASYNC_OP *op; WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -459,15 +459,13 @@ __wt_async_destroy(WT_SESSION_IMPL *session) } /* Free format resources */ - af = TAILQ_FIRST(&async->formatqh); - while (af != NULL) { - afnext = TAILQ_NEXT(af, q); + while ((af = TAILQ_FIRST(&async->formatqh)) != NULL) { + TAILQ_REMOVE(&async->formatqh, af, q); __wt_free(session, af->uri); __wt_free(session, af->config); __wt_free(session, af->key_format); __wt_free(session, af->value_format); __wt_free(session, af); - af = afnext; } __wt_free(session, async->async_queue); __wt_free(session, async->async_ops); diff --git a/src/async/async_worker.c b/src/async/async_worker.c index 2390d9e47cd..ff00ca1c9d1 100644 --- a/src/async/async_worker.c +++ b/src/async/async_worker.c @@ -282,7 +282,7 @@ WT_THREAD_RET __wt_async_worker(void *arg) { WT_ASYNC *async; - WT_ASYNC_CURSOR *ac, *acnext; + WT_ASYNC_CURSOR *ac; WT_ASYNC_OP_IMPL *op; WT_ASYNC_WORKER_STATE worker; WT_CONNECTION_IMPL *conn; @@ -341,12 +341,10 @@ err: WT_PANIC_MSG(session, ret, "async worker error"); * Worker thread cleanup, close our cached cursors and free all the * WT_ASYNC_CURSOR structures. */ - ac = TAILQ_FIRST(&worker.cursorqh); - while (ac != NULL) { - acnext = TAILQ_NEXT(ac, q); + while ((ac = TAILQ_FIRST(&worker.cursorqh)) != NULL) { + TAILQ_REMOVE(&worker.cursorqh, ac, q); WT_TRET(ac->c->close(ac->c)); __wt_free(session, ac); - ac = acnext; } return (WT_THREAD_RET_VALUE); } diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 44333ceec3f..c0a1f5c0920 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -175,13 +175,13 @@ __wt_conn_remove_collator(WT_SESSION_IMPL *session) conn = S2C(session); while ((ncoll = TAILQ_FIRST(&conn->collqh)) != NULL) { + /* Remove from the connection's list, free memory. */ + TAILQ_REMOVE(&conn->collqh, ncoll, q); /* Call any termination method. */ if (ncoll->collator->terminate != NULL) WT_TRET(ncoll->collator->terminate( ncoll->collator, (WT_SESSION *)session)); - /* Remove from the connection's list, free memory. */ - TAILQ_REMOVE(&conn->collqh, ncoll, q); __wt_free(session, ncoll->name); __wt_free(session, ncoll); } @@ -281,13 +281,13 @@ __wt_conn_remove_compressor(WT_SESSION_IMPL *session) conn = S2C(session); while ((ncomp = TAILQ_FIRST(&conn->compqh)) != NULL) { + /* Remove from the connection's list, free memory. */ + TAILQ_REMOVE(&conn->compqh, ncomp, q); /* Call any termination method. */ if (ncomp->compressor->terminate != NULL) WT_TRET(ncomp->compressor->terminate( ncomp->compressor, (WT_SESSION *)session)); - /* Remove from the connection's list, free memory. */ - TAILQ_REMOVE(&conn->compqh, ncomp, q); __wt_free(session, ncomp->name); __wt_free(session, ncomp); } @@ -346,13 +346,13 @@ __wt_conn_remove_data_source(WT_SESSION_IMPL *session) conn = S2C(session); while ((ndsrc = TAILQ_FIRST(&conn->dsrcqh)) != NULL) { + /* Remove from the connection's list, free memory. */ + TAILQ_REMOVE(&conn->dsrcqh, ndsrc, q); /* Call any termination method. */ if (ndsrc->dsrc->terminate != NULL) WT_TRET(ndsrc->dsrc->terminate( ndsrc->dsrc, (WT_SESSION *)session)); - /* Remove from the connection's list, free memory. */ - TAILQ_REMOVE(&conn->dsrcqh, ndsrc, q); __wt_free(session, ndsrc->prefix); __wt_free(session, ndsrc); } @@ -536,14 +536,16 @@ __wt_conn_remove_encryptor(WT_SESSION_IMPL *session) conn = S2C(session); while ((nenc = TAILQ_FIRST(&conn->encryptqh)) != NULL) { + /* Remove from the connection's list, free memory. */ + TAILQ_REMOVE(&conn->encryptqh, nenc, q); while ((kenc = TAILQ_FIRST(&nenc->keyedqh)) != NULL) { + /* Remove from the connection's list, free memory. */ + TAILQ_REMOVE(&nenc->keyedqh, kenc, q); /* Call any termination method. */ if (kenc->owned && kenc->encryptor->terminate != NULL) WT_TRET(kenc->encryptor->terminate( kenc->encryptor, (WT_SESSION *)session)); - /* Remove from the connection's list, free memory. */ - TAILQ_REMOVE(&nenc->keyedqh, kenc, q); __wt_free(session, kenc->keyid); __wt_free(session, kenc); } @@ -553,8 +555,6 @@ __wt_conn_remove_encryptor(WT_SESSION_IMPL *session) WT_TRET(nenc->encryptor->terminate( nenc->encryptor, (WT_SESSION *)session)); - /* Remove from the connection's list, free memory. */ - TAILQ_REMOVE(&conn->encryptqh, nenc, q); __wt_free(session, nenc->name); __wt_free(session, nenc); } @@ -680,13 +680,13 @@ __wt_conn_remove_extractor(WT_SESSION_IMPL *session) conn = S2C(session); while ((nextractor = TAILQ_FIRST(&conn->extractorqh)) != NULL) { + /* Remove from the connection's list, free memory. */ + TAILQ_REMOVE(&conn->extractorqh, nextractor, q); /* Call any termination method. */ if (nextractor->extractor->terminate != NULL) WT_TRET(nextractor->extractor->terminate( nextractor->extractor, (WT_SESSION *)session)); - /* Remove from the connection's list, free memory. */ - TAILQ_REMOVE(&conn->extractorqh, nextractor, q); __wt_free(session, nextractor->name); __wt_free(session, nextractor); } diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 4b4f4b8bc3f..4a653dc4c8f 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -634,7 +634,7 @@ int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; - WT_DATA_HANDLE *dhandle; + WT_DATA_HANDLE *dhandle, *dhandle_tmp; WT_DECL_RET; conn = S2C(session); @@ -680,10 +680,11 @@ restart: WT_TRET(session->meta_cursor->close(session->meta_cursor)); /* Close the metadata file handle. */ - while ((dhandle = TAILQ_FIRST(&conn->dhqh)) != NULL) + WT_TAILQ_SAFE_REMOVE_BEGIN(dhandle, &conn->dhqh, q, dhandle_tmp) { WT_WITH_DHANDLE(session, dhandle, WT_TRET(__wt_conn_dhandle_discard_single( session, true, F_ISSET(conn, WT_CONN_IN_MEMORY)))); + } WT_TAILQ_SAFE_REMOVE_END return (ret); } diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index 4ad4050dd9e..fbedb938bd8 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -219,15 +219,12 @@ static int __sweep_remove_handles(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; - WT_DATA_HANDLE *dhandle, *dhandle_next; + WT_DATA_HANDLE *dhandle, *dhandle_tmp; WT_DECL_RET; conn = S2C(session); - for (dhandle = TAILQ_FIRST(&conn->dhqh); - dhandle != NULL; - dhandle = dhandle_next) { - dhandle_next = TAILQ_NEXT(dhandle, q); + TAILQ_FOREACH_SAFE(dhandle, &conn->dhqh, q, dhandle_tmp) { if (WT_IS_METADATA(dhandle)) continue; if (!WT_DHANDLE_CAN_DISCARD(dhandle)) diff --git a/src/include/misc.h b/src/include/misc.h index c982b74a858..c84368b235c 100644 --- a/src/include/misc.h +++ b/src/include/misc.h @@ -276,3 +276,22 @@ union __wt_rand_state { uint32_t w, z; } x; }; + +/* + * WT_TAILQ_SAFE_REMOVE_BEGIN/END -- + * Macro to safely walk a TAILQ where we're expecting some underlying + * function to remove elements from the list, but we don't want to stop on + * error, nor do we want an error to turn into an infinite loop. Used during + * shutdown, when we're shutting down various lists. Unlike TAILQ_FOREACH_SAFE, + * this macro works even when the next element gets removed along with the + * current one. + */ +#define WT_TAILQ_SAFE_REMOVE_BEGIN(var, head, field, tvar) \ + for ((tvar) = NULL; ((var) = TAILQ_FIRST(head)) != NULL; \ + (tvar) = (var)) { \ + if ((tvar) == (var)) { \ + /* Leak the structure. */ \ + TAILQ_REMOVE(head, (var), field); \ + continue; \ + } +#define WT_TAILQ_SAFE_REMOVE_END } diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 82a6fc8f86c..88b3e1980be 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -500,7 +500,7 @@ void __wt_lsm_manager_clear_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { WT_LSM_MANAGER *manager; - WT_LSM_WORK_UNIT *current, *next; + WT_LSM_WORK_UNIT *current, *tmp; uint64_t removed; manager = &S2C(session)->lsm_manager; @@ -508,11 +508,7 @@ __wt_lsm_manager_clear_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) /* Clear out the tree from the switch queue */ __wt_spin_lock(session, &manager->switch_lock); - - /* Structure the loop so that it's safe to free as we iterate */ - for (current = TAILQ_FIRST(&manager->switchqh); - current != NULL; current = next) { - next = TAILQ_NEXT(current, q); + TAILQ_FOREACH_SAFE(current, &manager->switchqh, q, tmp) { if (current->lsm_tree != lsm_tree) continue; ++removed; @@ -522,9 +518,7 @@ __wt_lsm_manager_clear_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) __wt_spin_unlock(session, &manager->switch_lock); /* Clear out the tree from the application queue */ __wt_spin_lock(session, &manager->app_lock); - for (current = TAILQ_FIRST(&manager->appqh); - current != NULL; current = next) { - next = TAILQ_NEXT(current, q); + TAILQ_FOREACH_SAFE(current, &manager->appqh, q, tmp) { if (current->lsm_tree != lsm_tree) continue; ++removed; @@ -534,9 +528,7 @@ __wt_lsm_manager_clear_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) __wt_spin_unlock(session, &manager->app_lock); /* Clear out the tree from the manager queue */ __wt_spin_lock(session, &manager->manager_lock); - for (current = TAILQ_FIRST(&manager->managerqh); - current != NULL; current = next) { - next = TAILQ_NEXT(current, q); + TAILQ_FOREACH_SAFE(current, &manager->managerqh, q, tmp) { if (current->lsm_tree != lsm_tree) continue; ++removed; diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index d9c7a7d7284..fb8eb9d38a7 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -134,11 +134,12 @@ int __wt_lsm_tree_close_all(WT_SESSION_IMPL *session) { WT_DECL_RET; - WT_LSM_TREE *lsm_tree; + WT_LSM_TREE *lsm_tree, *lsm_tree_tmp; /* We are shutting down: the handle list lock isn't required. */ - while ((lsm_tree = TAILQ_FIRST(&S2C(session)->lsmqh)) != NULL) { + WT_TAILQ_SAFE_REMOVE_BEGIN(lsm_tree, + &S2C(session)->lsmqh, q, lsm_tree_tmp) { /* * Tree close assumes that we have a reference to the tree * so it can tell when it's safe to do the close. We could @@ -149,7 +150,7 @@ __wt_lsm_tree_close_all(WT_SESSION_IMPL *session) (void)__wt_atomic_add32(&lsm_tree->refcnt, 1); __lsm_tree_close(session, lsm_tree, true); WT_TRET(__lsm_tree_discard(session, lsm_tree, true)); - } + } WT_TAILQ_SAFE_REMOVE_END return (ret); } diff --git a/src/os_common/os_fhandle.c b/src/os_common/os_fhandle.c index 2a67447f8d2..69a01b41d14 100644 --- a/src/os_common/os_fhandle.c +++ b/src/os_common/os_fhandle.c @@ -280,6 +280,41 @@ err: if (open_called) return (ret); } +/* + * __handle_close -- + * Final close of a handle. + */ +static int +__handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + uint64_t bucket; + + conn = S2C(session); + + if (fh->ref != 0) { + __wt_errx(session, + "Closing a file handle with open references: %s", fh->name); + WT_TRET(EBUSY); + } + + /* Remove from the list. */ + bucket = fh->name_hash % WT_HASH_ARRAY_SIZE; + WT_FILE_HANDLE_REMOVE(conn, fh, bucket); + (void)__wt_atomic_sub32(&conn->open_file_count, 1); + + __wt_spin_unlock(session, &conn->fh_lock); + + /* Discard underlying resources. */ + WT_TRET(fh->handle->close(fh->handle, (WT_SESSION *)session)); + + __wt_free(session, fh->name); + __wt_free(session, fh); + + return (ret); +} + /* * __wt_close -- * Close a file handle. @@ -288,9 +323,7 @@ int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) { WT_CONNECTION_IMPL *conn; - WT_DECL_RET; WT_FH *fh; - uint64_t bucket; conn = S2C(session); @@ -315,20 +348,7 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) return (0); } - /* Remove from the list. */ - bucket = fh->name_hash % WT_HASH_ARRAY_SIZE; - WT_FILE_HANDLE_REMOVE(conn, fh, bucket); - (void)__wt_atomic_sub32(&conn->open_file_count, 1); - - __wt_spin_unlock(session, &conn->fh_lock); - - /* Discard underlying resources. */ - ret = fh->handle->close(fh->handle, (WT_SESSION *)session); - - __wt_free(session, fh->name); - __wt_free(session, fh); - - return (ret); + return (__handle_close(session, fh)); } /* @@ -339,21 +359,10 @@ int __wt_close_connection_close(WT_SESSION_IMPL *session) { WT_DECL_RET; - WT_FH *fh; - WT_CONNECTION_IMPL *conn; + WT_FH *fh, *fh_tmp; - conn = S2C(session); - - while ((fh = TAILQ_FIRST(&conn->fhqh)) != NULL) { - if (fh->ref != 0) { - ret = EBUSY; - __wt_errx(session, - "Connection has open file handles: %s", fh->name); - } - - fh->ref = 1; - - WT_TRET(__wt_close(session, &fh)); - } + WT_TAILQ_SAFE_REMOVE_BEGIN(fh, &S2C(session)->fhqh, q, fh_tmp) { + WT_TRET(__handle_close(session, fh)); + } WT_TAILQ_SAFE_REMOVE_END return (ret); } diff --git a/src/os_common/os_fs_inmemory.c b/src/os_common/os_fs_inmemory.c index 3ea25530aef..e669ea2802d 100644 --- a/src/os_common/os_fs_inmemory.c +++ b/src/os_common/os_fs_inmemory.c @@ -52,7 +52,7 @@ __im_handle_search(WT_FILE_SYSTEM *file_system, const char *name) */ static int __im_handle_remove(WT_SESSION_IMPL *session, - WT_FILE_SYSTEM *file_system, WT_FILE_HANDLE_INMEM *im_fh) + WT_FILE_SYSTEM *file_system, WT_FILE_HANDLE_INMEM *im_fh, bool force) { WT_FILE_HANDLE *fhp; WT_FILE_SYSTEM_INMEM *im_fs; @@ -60,9 +60,11 @@ __im_handle_remove(WT_SESSION_IMPL *session, im_fs = (WT_FILE_SYSTEM_INMEM *)file_system; - if (im_fh->ref != 0) - WT_RET_MSG(session, EBUSY, - "%s: file-remove", im_fh->iface.name); + if (im_fh->ref != 0) { + __wt_err(session, EBUSY, "%s: file-remove", im_fh->iface.name); + if (!force) + return (EBUSY); + } bucket = im_fh->name_hash % WT_HASH_ARRAY_SIZE; WT_FILE_HANDLE_REMOVE(im_fs, im_fh, bucket); @@ -205,7 +207,7 @@ __im_fs_remove(WT_FILE_SYSTEM *file_system, ret = ENOENT; if ((im_fh = __im_handle_search(file_system, name)) != NULL) - ret = __im_handle_remove(session, file_system, im_fh); + ret = __im_handle_remove(session, file_system, im_fh, false); __wt_spin_unlock(session, &im_fs->lock); return (ret); @@ -511,15 +513,16 @@ static int __im_terminate(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session) { WT_DECL_RET; - WT_FILE_HANDLE_INMEM *im_fh; + WT_FILE_HANDLE_INMEM *im_fh, *im_fh_tmp; WT_FILE_SYSTEM_INMEM *im_fs; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; im_fs = (WT_FILE_SYSTEM_INMEM *)file_system; - while ((im_fh = TAILQ_FIRST(&im_fs->fhqh)) != NULL) - WT_TRET(__im_handle_remove(session, file_system, im_fh)); + WT_TAILQ_SAFE_REMOVE_BEGIN(im_fh, &im_fs->fhqh, q, im_fh_tmp) { + WT_TRET(__im_handle_remove(session, file_system, im_fh, true)); + } WT_TAILQ_SAFE_REMOVE_END __wt_spin_destroy(session, &im_fs->lock); __wt_free(session, im_fs); diff --git a/src/schema/schema_list.c b/src/schema/schema_list.c index bbdc3568af3..20e65d5acc9 100644 --- a/src/schema/schema_list.c +++ b/src/schema/schema_list.c @@ -244,9 +244,11 @@ int __wt_schema_close_tables(WT_SESSION_IMPL *session) { WT_DECL_RET; - WT_TABLE *table; + WT_TABLE *table, *table_tmp; - while ((table = TAILQ_FIRST(&session->tables)) != NULL) + WT_TAILQ_SAFE_REMOVE_BEGIN(table, &session->tables, q, table_tmp) { WT_TRET(__wt_schema_remove_table(session, table)); + } WT_TAILQ_SAFE_REMOVE_END + return (ret); } diff --git a/src/session/session_api.c b/src/session/session_api.c index 21f03915e2a..3c01dadc48c 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -183,7 +183,7 @@ static int __session_close(WT_SESSION *wt_session, const char *config) { WT_CONNECTION_IMPL *conn; - WT_CURSOR *cursor; + WT_CURSOR *cursor, *cursor_tmp; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -205,7 +205,7 @@ __session_close(WT_SESSION *wt_session, const char *config) __wt_txn_release_snapshot(session); /* Close all open cursors. */ - while ((cursor = TAILQ_FIRST(&session->cursors)) != NULL) { + WT_TAILQ_SAFE_REMOVE_BEGIN(cursor, &session->cursors, q, cursor_tmp) { /* * Notify the user that we are closing the cursor handle * via the registered close callback. @@ -215,7 +215,7 @@ __session_close(WT_SESSION *wt_session, const char *config) WT_TRET(session->event_handler->handle_close( session->event_handler, wt_session, cursor)); WT_TRET(cursor->close(cursor)); - } + } WT_TAILQ_SAFE_REMOVE_END WT_ASSERT(session, session->ncursors == 0); diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c index 3cfbfcead36..2d0a2eeb2dc 100644 --- a/src/session/session_dhandle.c +++ b/src/session/session_dhandle.c @@ -369,10 +369,12 @@ retry: WT_RET(__wt_meta_checkpoint_last_name( void __wt_session_close_cache(WT_SESSION_IMPL *session) { - WT_DATA_HANDLE_CACHE *dhandle_cache; + WT_DATA_HANDLE_CACHE *dhandle_cache, *dhandle_cache_tmp; - while ((dhandle_cache = TAILQ_FIRST(&session->dhandles)) != NULL) + WT_TAILQ_SAFE_REMOVE_BEGIN(dhandle_cache, + &session->dhandles, q, dhandle_cache_tmp) { __session_discard_dhandle(session, dhandle_cache); + } WT_TAILQ_SAFE_REMOVE_END } /* @@ -384,7 +386,7 @@ __session_dhandle_sweep(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle; - WT_DATA_HANDLE_CACHE *dhandle_cache, *dhandle_cache_next; + WT_DATA_HANDLE_CACHE *dhandle_cache, *dhandle_cache_tmp; time_t now; conn = S2C(session); @@ -400,9 +402,8 @@ __session_dhandle_sweep(WT_SESSION_IMPL *session) WT_STAT_CONN_INCR(session, dh_session_sweeps); - dhandle_cache = TAILQ_FIRST(&session->dhandles); - while (dhandle_cache != NULL) { - dhandle_cache_next = TAILQ_NEXT(dhandle_cache, q); + TAILQ_FOREACH_SAFE(dhandle_cache, + &session->dhandles, q, dhandle_cache_tmp) { dhandle = dhandle_cache->dhandle; if (dhandle != session->dhandle && dhandle->session_inuse == 0 && @@ -414,7 +415,6 @@ __session_dhandle_sweep(WT_SESSION_IMPL *session) WT_ASSERT(session, !WT_IS_METADATA(dhandle)); __session_discard_dhandle(session, dhandle_cache); } - dhandle_cache = dhandle_cache_next; } } -- cgit v1.2.1 From 095efd2952d6c957bf468b8180ed4f495e4d7a08 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 18 Apr 2017 02:40:08 -0400 Subject: WT-3284 tree-walk restart bug (#3389) The tree-walk restart code (that is, dealing with WT_RESTART being returned from a page-coupling attempt after a page split), depends on the tree-walk function never hazard coupling from a leaf page, that is, any leaf page it acquires must be returned to the caller. The "skip leaf page during a tree walk" functionality violated that requirement. Rather than rework the tree walk code's restart handling, move half of the functionality to skip a count of leaf pages into the tree-walk function's caller, essentially, it's an easy way to jump back into the tree-walk code at the right place. --- dist/flags.py | 1 - src/btree/bt_random.c | 3 +- src/btree/bt_walk.c | 99 +++++++++++++++++++++++++-------------------------- src/include/extern.h | 2 +- src/include/flags.h | 5 ++- 5 files changed, 52 insertions(+), 58 deletions(-) diff --git a/dist/flags.py b/dist/flags.py index 64b5d789e72..d80c80a37ce 100644 --- a/dist/flags.py +++ b/dist/flags.py @@ -32,7 +32,6 @@ flags = { 'READ_PREV', 'READ_RESTART_OK', 'READ_SKIP_INTL', - 'READ_SKIP_LEAF', 'READ_TRUNCATE', 'READ_WONT_NEED', ], diff --git a/src/btree/bt_random.c b/src/btree/bt_random.c index 90780b05cab..1bdf0fd1c8b 100644 --- a/src/btree/bt_random.c +++ b/src/btree/bt_random.c @@ -395,8 +395,7 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) */ for (skip = cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;) { n = skip; - WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip, - WT_READ_NO_GEN | WT_READ_SKIP_INTL | WT_READ_WONT_NEED)); + WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip)); if (n == skip) { if (skip == 0) break; diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c index 8432707fc31..225e6812aa1 100644 --- a/src/btree/bt_walk.c +++ b/src/btree/bt_walk.c @@ -497,29 +497,21 @@ restart: /* } /* - * Optionally skip leaf pages: skip all leaf pages if - * WT_READ_SKIP_LEAF is set, when the skip-leaf-count - * variable is non-zero, skip some count of leaf pages. - * If this page is disk-based, crack the cell to figure - * out it's a leaf page without reading it. + * Optionally skip leaf pages: when the skip-leaf-count + * variable is non-zero, skip some count of leaf pages, + * then take the next leaf page we can. * - * If skipping some number of leaf pages, decrement the - * count of pages to zero, and then take the next leaf - * page we can. Be cautious around the page decrement, - * if for some reason don't take this particular page, - * we can take the next one, and, there are additional - * tests/decrements when we're about to return a leaf - * page. + * The reason to do some of this work here (rather than + * in our caller), is because we can look at the cell + * and know it's a leaf page without reading it into + * memory. If this page is disk-based, crack the cell + * to figure out it's a leaf page without reading it. */ - if (skipleafcntp != NULL || LF_ISSET(WT_READ_SKIP_LEAF)) - if (__ref_is_leaf(ref)) { - if (LF_ISSET(WT_READ_SKIP_LEAF)) - break; - if (*skipleafcntp > 0) { - --*skipleafcntp; - break; - } - } + if (skipleafcntp != NULL && + *skipleafcntp > 0 && __ref_is_leaf(ref)) { + --*skipleafcntp; + break; + } ret = __wt_page_swap(session, couple, ref, WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK | flags); @@ -626,34 +618,18 @@ descend: empty_internal = true; session, ref, &pindex); slot = pindex->entries - 1; } - } else { - /* - * At the lowest tree level (considering a leaf - * page), turn off the initial-descent state. - * Descent race tests are different when moving - * through the tree vs. the initial descent. - */ - initial_descent = false; - - /* - * Optionally skip leaf pages, the second half. - * We didn't have an on-page cell to figure out - * if it was a leaf page, we had to acquire the - * hazard pointer and look at the page. - */ - if (skipleafcntp != NULL || - LF_ISSET(WT_READ_SKIP_LEAF)) { - if (LF_ISSET(WT_READ_SKIP_LEAF)) - break; - if (*skipleafcntp > 0) { - --*skipleafcntp; - break; - } - } - - *refp = ref; - goto done; + continue; } + + /* + * The tree-walk restart code knows we return any leaf + * page we acquire (never hazard-pointer coupling on + * after acquiring a leaf page), and asserts no restart + * happens while holding a leaf page. This page must be + * returned to our caller. + */ + *refp = ref; + goto done; } } @@ -690,8 +666,29 @@ __wt_tree_walk_count(WT_SESSION_IMPL *session, * of leaf pages before returning. */ int -__wt_tree_walk_skip(WT_SESSION_IMPL *session, - WT_REF **refp, uint64_t *skipleafcntp, uint32_t flags) +__wt_tree_walk_skip( + WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) { - return (__tree_walk_internal(session, refp, NULL, skipleafcntp, flags)); + /* + * Optionally skip leaf pages, the second half. The tree-walk function + * didn't have an on-page cell it could use to figure out if the page + * was a leaf page or not, it had to acquire the hazard pointer and look + * at the page. The tree-walk code never acquires a hazard pointer on a + * leaf page without returning it, and it's not trivial to change that. + * So, the tree-walk code returns all leaf pages here and we deal with + * decrementing the count. + */ + do { + WT_RET(__tree_walk_internal(session, refp, NULL, skipleafcntp, + WT_READ_NO_GEN | WT_READ_SKIP_INTL | WT_READ_WONT_NEED)); + + /* + * The walk skipped internal pages, any page returned must be a + * leaf page. + */ + if (*skipleafcntp > 0) + --*skipleafcntp; + } while (*skipleafcntp > 0); + + return (0); } diff --git a/src/include/extern.h b/src/include/extern.h index 4f38b7ac433..05fafafef63 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -179,7 +179,7 @@ extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, cons extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_tree_walk_skip(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_tree_walk_skip( WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/include/flags.h b/src/include/flags.h index f26a45c68f5..d7c0e0f9472 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -47,9 +47,8 @@ #define WT_READ_PREV 0x00000080 #define WT_READ_RESTART_OK 0x00000100 #define WT_READ_SKIP_INTL 0x00000200 -#define WT_READ_SKIP_LEAF 0x00000400 -#define WT_READ_TRUNCATE 0x00000800 -#define WT_READ_WONT_NEED 0x00001000 +#define WT_READ_TRUNCATE 0x00000400 +#define WT_READ_WONT_NEED 0x00000800 #define WT_SESSION_CAN_WAIT 0x00000001 #define WT_SESSION_INTERNAL 0x00000002 #define WT_SESSION_LOCKED_CHECKPOINT 0x00000004 -- cgit v1.2.1 From 0bad9147528b3fa7bc6d0af815e3359a3f21f2af Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Tue, 18 Apr 2017 03:21:56 -0400 Subject: WT-3275 Fix a bug in LSM cursor semantics. (#3388) If an update, remove or reserve operation required a search it was possible for a race with eviction to cause a segfault between the lookup and requested operation. --- src/lsm/lsm_cursor.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index e9943d24cda..e62d6cab584 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -1530,6 +1530,11 @@ __clsm_insert(WT_CURSOR *cursor) WT_ERR(__cursor_needvalue(cursor)); WT_ERR(__clsm_enter(clsm, false, true)); + /* + * It isn't necessary to copy the key out after the lookup in this + * case because any non-failed lookup results in an error, and a + * failed lookup leaves the original key intact. + */ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && (ret = __clsm_lookup(clsm, &value)) != WT_NOTFOUND) { if (ret == 0) @@ -1574,8 +1579,14 @@ __clsm_update(WT_CURSOR *cursor) WT_ERR(__cursor_needvalue(cursor)); WT_ERR(__clsm_enter(clsm, false, true)); - if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { WT_ERR(__clsm_lookup(clsm, &value)); + /* + * Copy the key out, since the insert resets non-primary chunk + * cursors which our lookup may have landed on. + */ + WT_ERR(__cursor_needkey(cursor)); + } WT_ERR(__clsm_deleted_encode(session, &cursor->value, &value, &buf)); WT_ERR(__clsm_put(session, clsm, &cursor->key, &value, true, false)); @@ -1621,8 +1632,14 @@ __clsm_remove(WT_CURSOR *cursor) __cursor_novalue(cursor); WT_ERR(__clsm_enter(clsm, false, true)); - if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { WT_ERR(__clsm_lookup(clsm, &value)); + /* + * Copy the key out, since the insert resets non-primary chunk + * cursors which our lookup may have landed on. + */ + WT_ERR(__cursor_needkey(cursor)); + } WT_ERR(__clsm_put( session, clsm, &cursor->key, &__tombstone, positioned, false)); @@ -1663,8 +1680,13 @@ __clsm_reserve(WT_CURSOR *cursor) WT_ERR(__wt_txn_context_check(session, true)); WT_ERR(__clsm_enter(clsm, false, true)); - if ((ret = __clsm_lookup(clsm, &value)) == 0) - ret = __clsm_put(session, clsm, &cursor->key, NULL, true, true); + WT_ERR(__clsm_lookup(clsm, &value)); + /* + * Copy the key out, since the insert resets non-primary chunk cursors + * which our lookup may have landed on. + */ + WT_ERR(__cursor_needkey(cursor)); + ret = __clsm_put(session, clsm, &cursor->key, NULL, true, true); err: __clsm_leave(clsm); CURSOR_UPDATE_API_END(session, ret); -- cgit v1.2.1 From 145b457de786695c2b3709356951e44e3d2833b4 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 18 Apr 2017 10:57:22 -0400 Subject: SERVER-28835 Fix a memory leak in WiredTiger on error when creating thread group. (#3394) Coverity analysis defect 101330: Resource leak Discard any allocated thread structure on error. --- src/support/thread_group.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/src/support/thread_group.c b/src/support/thread_group.c index 09ba10097ca..84c836e5627 100644 --- a/src/support/thread_group.c +++ b/src/support/thread_group.c @@ -134,11 +134,13 @@ __thread_group_resize( { WT_CONNECTION_IMPL *conn; WT_DECL_RET; + WT_SESSION *wt_session; WT_THREAD *thread; size_t alloc; uint32_t i, session_flags; conn = S2C(session); + thread = NULL; session_flags = 0; __wt_verbose(session, WT_VERB_THREAD_GROUP, @@ -199,8 +201,6 @@ __thread_group_resize( thread->stop_func = group->stop_func; WT_ERR(__wt_cond_alloc( session, "Thread cond", &thread->pause_cond)); - WT_ASSERT(session, group->threads[i] == NULL); - group->threads[i] = thread; /* * Start thread as inactive. We'll activate the needed @@ -210,30 +210,43 @@ __thread_group_resize( "Starting utility thread: %p:%" PRIu32, (void *)group, thread->id); F_SET(thread, WT_THREAD_RUN); - WT_ASSERT(session, thread->session != NULL); WT_ERR(__wt_thread_create(thread->session, &thread->tid, __thread_run, thread)); + + WT_ASSERT(session, group->threads[i] == NULL); + group->threads[i] = thread; + thread = NULL; } + group->max = new_max; + group->min = new_min; + while (group->current_threads < new_min) + __wt_thread_group_start_one(session, group, true); + return (0); + err: /* + * An error resizing a thread array is currently fatal, it should only + * happen in an out of memory situation. Do real cleanup just in case + * that changes in the future. + */ + if (thread != NULL) { + if (thread->session != NULL) { + wt_session = (WT_SESSION *)thread->session; + WT_TRET(wt_session->close(wt_session, NULL)); + } + WT_TRET(__wt_cond_destroy(session, &thread->pause_cond)); + __wt_free(session, thread); + } + + /* * Update the thread group information even on failure to improve our * chances of cleaning up properly. */ group->max = new_max; group->min = new_min; + WT_TRET(__wt_thread_group_destroy(session, group)); - /* - * An error resizing a thread array is fatal, it should only happen - * in an out of memory situation. - */ - if (ret == 0) - while (group->current_threads < new_min) - __wt_thread_group_start_one(session, group, true); - else { - WT_TRET(__wt_thread_group_destroy(session, group)); - WT_PANIC_RET(session, ret, "Error while resizing thread group"); - } - return (ret); + WT_PANIC_RET(session, ret, "Error while resizing thread group"); } /* -- cgit v1.2.1 From 35e221c039a0931af5b3a18069e57ba9a218aead Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 18 Apr 2017 14:02:05 -0400 Subject: WT-3287 Remove some WiredTiger internal panic checks (#3391) * API_SESSION_INIT is only called from two places, API_CALL_NOCONF and API_CALL, both of which include identical checks on the connection's panic state and WT_VERB_API calls. Move the check and call into the API_SESSION_INIT macro rather than repeating it. There's a semantic change in that API_CALL will now do the WT_VERB_API call before parsing the configuration, but that looks like a bug fix to me. Minor API_SESSION_INIT cleanup, there's no need to terminate with a semi-colon or backslash escape the last line in the macro. Remove WT_SESSION_CHECK_PANIC() calls only intended to avoid threads hanging inside WiredTiger. --- dist/s_void | 4 ++-- src/async/async_worker.c | 1 - src/include/api.h | 10 ++++----- src/include/extern.h | 4 ++-- src/log/log.c | 24 +++++++--------------- src/log/log_slot.c | 9 ++------- test/csuite/wt2909_checkpoint_integrity/main.c | 28 +++++++++++++++++++++++++- 7 files changed, 44 insertions(+), 36 deletions(-) diff --git a/dist/s_void b/dist/s_void index 249f043d029..d7f2c81a211 100755 --- a/dist/s_void +++ b/dist/s_void @@ -88,8 +88,8 @@ func_ok() -e '/int handle_progress$/d' \ -e '/int helium_cursor_reset$/d' \ -e '/int helium_session_verify$/d' \ - -e '/int index_compare_primary$/d' \ -e '/int index_compare_S$/d' \ + -e '/int index_compare_primary$/d' \ -e '/int index_compare_u$/d' \ -e '/int index_extractor_u$/d' \ -e '/int log_print_err$/d' \ @@ -103,7 +103,6 @@ func_ok() -e '/int nop_pre_size$/d' \ -e '/int nop_sizing$/d' \ -e '/int nop_terminate$/d' \ - -e '/int nop_terminate$/d' \ -e '/int os_errno$/d' \ -e '/int revint_terminate$/d' \ -e '/int rotn_error$/d' \ @@ -111,6 +110,7 @@ func_ok() -e '/int rotn_terminate$/d' \ -e '/int snappy_pre_size$/d' \ -e '/int snappy_terminate$/d' \ + -e '/int subtest_error_handler$/d' \ -e '/int uri2name$/d' \ -e '/int usage$/d' \ -e '/int util_err$/d' \ diff --git a/src/async/async_worker.c b/src/async/async_worker.c index ff00ca1c9d1..57ebe5d8bb1 100644 --- a/src/async/async_worker.c +++ b/src/async/async_worker.c @@ -57,7 +57,6 @@ retry: return (0); if (!F_ISSET(conn, WT_CONN_SERVER_ASYNC)) return (0); - WT_RET(WT_SESSION_CHECK_PANIC(session)); WT_ORDERED_READ(last_consume, async->alloc_tail); } if (async->flush_state == WT_ASYNC_FLUSHING) diff --git a/src/include/api.h b/src/include/api.h index 994855e6087..fb0c41fe1c8 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -12,21 +12,19 @@ const char *__oldname = (s)->name; \ (s)->dhandle = (dh); \ (s)->name = (s)->lastop = #h "." #n; \ - -#define API_CALL_NOCONF(s, h, n, dh) do { \ - API_SESSION_INIT(s, h, n, dh); \ WT_ERR(WT_SESSION_CHECK_PANIC(s)); \ __wt_verbose((s), WT_VERB_API, "CALL: " #h ":" #n) +#define API_CALL_NOCONF(s, h, n, dh) do { \ + API_SESSION_INIT(s, h, n, dh) + #define API_CALL(s, h, n, dh, config, cfg) do { \ const char *(cfg)[] = \ { WT_CONFIG_BASE(s, h##_##n), config, NULL }; \ API_SESSION_INIT(s, h, n, dh); \ - WT_ERR(WT_SESSION_CHECK_PANIC(s)); \ if ((config) != NULL) \ WT_ERR(__wt_config_check((s), \ - WT_CONFIG_REF(session, h##_##n), (config), 0)); \ - __wt_verbose((s), WT_VERB_API, "CALL: " #h ":" #n) + WT_CONFIG_REF(session, h##_##n), (config), 0)) #define API_END(s, ret) \ if ((s) != NULL) { \ diff --git a/src/include/extern.h b/src/include/extern.h index 05fafafef63..ed40df8280f 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -406,8 +406,8 @@ extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) W extern int __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int64_t __wt_log_slot_release(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, int64_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int64_t __wt_log_slot_release(WT_MYSLOT *myslot, int64_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_clsm_request_switch(WT_CURSOR_LSM *clsm) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_clsm_await_switch(WT_CURSOR_LSM *clsm) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/log/log.c b/src/log/log.c index c0076951e86..f95ad93d872 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -24,7 +24,7 @@ static int __log_write_internal( * __log_wait_for_earlier_slot -- * Wait for write_lsn to catch up to this slot. */ -static int +static void __log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) { WT_CONNECTION_IMPL *conn; @@ -41,7 +41,6 @@ __log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) * unlock in case an earlier thread is trying to switch its * slot and complete its operation. */ - WT_RET(WT_SESSION_CHECK_PANIC(session)); if (F_ISSET(session, WT_SESSION_LOCKED_SLOT)) __wt_spin_unlock(session, &log->log_slot_lock); __wt_cond_signal(session, conn->log_wrlsn_cond); @@ -52,7 +51,6 @@ __log_wait_for_earlier_slot(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) if (F_ISSET(session, WT_SESSION_LOCKED_SLOT)) __wt_spin_lock(session, &log->log_slot_lock); } - return (0); } /* @@ -72,7 +70,7 @@ __log_fs_write(WT_SESSION_IMPL *session, * be a hole at the end of the previous log file that we cannot detect. */ if (slot->slot_release_lsn.l.file < slot->slot_start_lsn.l.file) { - WT_RET(__log_wait_for_earlier_slot(session, slot)); + __log_wait_for_earlier_slot(session, slot); WT_RET(__wt_log_force_sync(session, &slot->slot_release_lsn)); } if ((ret = __wt_write(session, slot->slot_fh, offset, len, buf)) != 0) @@ -112,7 +110,6 @@ __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) conn = S2C(session); log = conn->log; - WT_RET(WT_SESSION_CHECK_PANIC(session)); WT_RET(__wt_log_force_write(session, 1, NULL)); __wt_log_wrlsn(session, NULL); if (start) @@ -177,7 +174,6 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) * log file ready to close. */ while (log->sync_lsn.l.file < min_lsn->l.file) { - WT_RET(WT_SESSION_CHECK_PANIC(session)); __wt_cond_signal(session, S2C(session)->log_file_cond); __wt_cond_wait(session, log->log_sync_cond, 10000, NULL); } @@ -1468,7 +1464,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) * be holes in the log file. */ WT_STAT_CONN_INCR(session, log_release_write_lsn); - WT_ERR(__log_wait_for_earlier_slot(session, slot)); + __log_wait_for_earlier_slot(session, slot); log->write_start_lsn = slot->slot_start_lsn; log->write_lsn = slot->slot_end_lsn; @@ -1489,7 +1485,6 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) * current fsync completes and advance log->sync_lsn. */ while (F_ISSET(slot, WT_SLOT_SYNC | WT_SLOT_SYNC_DIR)) { - WT_ERR(WT_SESSION_CHECK_PANIC(session)); /* * We have to wait until earlier log files have finished their * sync operations. The most recent one will set the LSN to the @@ -2129,7 +2124,7 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, * The only time joining a slot should ever return an error is if it * detects a panic. */ - WT_ERR(__wt_log_slot_join(session, rdup_len, flags, &myslot)); + __wt_log_slot_join(session, rdup_len, flags, &myslot); /* * If the addition of this record crosses the buffer boundary, * switch in a new slot. @@ -2141,8 +2136,7 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, ret = __wt_log_slot_switch(session, &myslot, true, false, NULL); if (ret == 0) ret = __log_fill(session, &myslot, false, record, &lsn); - release_size = __wt_log_slot_release( - session, &myslot, (int64_t)rdup_len); + release_size = __wt_log_slot_release(&myslot, (int64_t)rdup_len); /* * If we get an error we still need to do proper accounting in * the slot fields. @@ -2171,19 +2165,15 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, if (LF_ISSET(WT_LOG_FLUSH)) { /* Wait for our writes to reach the OS */ while (__wt_log_cmp(&log->write_lsn, &lsn) <= 0 && - myslot.slot->slot_error == 0) { - WT_ERR(WT_SESSION_CHECK_PANIC(session)); + myslot.slot->slot_error == 0) __wt_cond_wait( session, log->log_write_cond, 10000, NULL); - } } else if (LF_ISSET(WT_LOG_FSYNC)) { /* Wait for our writes to reach disk */ while (__wt_log_cmp(&log->sync_lsn, &lsn) <= 0 && - myslot.slot->slot_error == 0) { - WT_ERR(WT_SESSION_CHECK_PANIC(session)); + myslot.slot->slot_error == 0) __wt_cond_wait( session, log->log_sync_cond, 10000, NULL); - } } /* diff --git a/src/log/log_slot.c b/src/log/log_slot.c index a9655dd903e..444babfda92 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -164,7 +164,6 @@ retry: #endif if (WT_LOG_SLOT_UNBUFFERED_ISSET(old_state)) { while (slot->slot_unbuffered == 0) { - WT_RET(WT_SESSION_CHECK_PANIC(session)); WT_STAT_CONN_INCR(session, log_slot_close_unbuf); __wt_yield(); #ifdef HAVE_DIAGNOSTIC @@ -321,7 +320,6 @@ __log_slot_switch_internal( *did_work = false; return (0); } - WT_RET(WT_SESSION_CHECK_PANIC(session)); /* * We may come through here multiple times if we were not able to @@ -495,7 +493,7 @@ __wt_log_slot_destroy(WT_SESSION_IMPL *session) * __wt_log_slot_join -- * Join a consolidated logging slot. */ -int +void __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) { @@ -533,7 +531,6 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, } for (;;) { WT_BARRIER(); - WT_RET(WT_SESSION_CHECK_PANIC(session)); slot = log->active_slot; old_state = slot->slot_state; if (WT_LOG_SLOT_OPEN(old_state)) { @@ -617,7 +614,6 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, myslot->slot = slot; myslot->offset = join_offset; myslot->end_offset = (wt_off_t)((uint64_t)join_offset + mysize); - return (0); } /* @@ -627,7 +623,7 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, * the memory buffer. */ int64_t -__wt_log_slot_release(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, int64_t size) +__wt_log_slot_release(WT_MYSLOT *myslot, int64_t size) { WT_LOGSLOT *slot; wt_off_t cur_offset, my_start; @@ -641,7 +637,6 @@ __wt_log_slot_release(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, int64_t size) * was written rather than the beginning record of the slot. */ while ((cur_offset = slot->slot_last_offset) < my_start) { - WT_RET(WT_SESSION_CHECK_PANIC(session)); /* * Set our offset if we are larger. */ diff --git a/test/csuite/wt2909_checkpoint_integrity/main.c b/test/csuite/wt2909_checkpoint_integrity/main.c index 47837356401..c93da4c1068 100644 --- a/test/csuite/wt2909_checkpoint_integrity/main.c +++ b/test/csuite/wt2909_checkpoint_integrity/main.c @@ -444,6 +444,31 @@ run_process(TEST_OPTS *opts, const char *prog, char *argv[], int *status) return (0); } +/* +* subtest_error_handler -- +* Error event handler. +*/ +static int +subtest_error_handler(WT_EVENT_HANDLER *handler, + WT_SESSION *session, int error, const char *message) +{ + (void)(handler); + (void)(session); + (void)(message); + + /* Exit on panic, there's no checking to be done. */ + if (error == WT_PANIC) + exit (1); + return (0); +} + +static WT_EVENT_HANDLER event_handler = { + subtest_error_handler, + NULL, /* Message handler */ + NULL, /* Progress handler */ + NULL /* Close handler */ +}; + /* * subtest_main -- * The main program for the subtest @@ -478,7 +503,8 @@ subtest_main(int argc, char *argv[], bool close_test) WT_FAIL_FS_LIB "=(early_load,config={environment=true,verbose=true})]")); - testutil_check(wiredtiger_open(opts->home, NULL, config, &opts->conn)); + testutil_check( + wiredtiger_open(opts->home, &event_handler, config, &opts->conn)); testutil_check( opts->conn->open_session(opts->conn, NULL, NULL, &session)); -- cgit v1.2.1 From f8db6cf5707ee44b7906f00add610b705d696dc6 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 19 Apr 2017 00:34:45 -0400 Subject: WT-3292 review/cleanup full-barrier calls in WiredTiger (#3395) * Rework logging's log_close_fh and log_close_lsn barrier code. There are two fields, the close handle and the close LSN, and they both have to be set. The __log_file_server() code was checking the close handle and then spinning until the close LSN was set, but a simpler solution is to publish the close LSN write before setting the close handle. To be clear, the previous code was correct, it was the comment that was wrong. * Remove barriers from around read/write of WT_TXN_GLOBAL.checkpoint_running. The read-barrier in __compact_checkpoint() isn't needed because the check is on memory declared volatile. The full-barrier in __txn_checkpoint_wrapper isn't needed because there's no ordering constraint. * Don't flush the reset of the WT_REF.state field, we've never seen any performance reason that a barrier is needed. * Fix a comment: technically, the las-was-written flag has to be flushed before any relevant read happens, document it that way. * Instead of explicitly flushing the clear of the WT_CONN_SERVER_LSM flag, change __wt_sleep() to imply a barrier, there are loops which don't have no other barriers, like __lsm_manager_run_server(). --- src/btree/bt_compact.c | 6 +----- src/btree/bt_sync.c | 2 +- src/cache/cache_las.c | 5 +++-- src/conn/conn_log.c | 10 ++++------ src/log/log.c | 16 +++++++--------- src/lsm/lsm_manager.c | 6 +----- src/lsm/lsm_tree.c | 4 ++-- src/os_posix/os_sleep.c | 8 ++++++++ src/os_win/os_sleep.c | 11 +++++++++-- src/session/session_compact.c | 5 ++++- src/txn/txn_ckpt.c | 5 +---- 11 files changed, 41 insertions(+), 37 deletions(-) diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c index e7edae5ea79..17308d02d91 100644 --- a/src/btree/bt_compact.c +++ b/src/btree/bt_compact.c @@ -228,12 +228,8 @@ __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) bm, session, addr, addr_size, skipp); } - /* - * Reset the WT_REF state and push the change. The full-barrier isn't - * necessary, but it's better to keep pages in circulation than not. - */ + /* Reset the WT_REF state. */ ref->state = WT_REF_DISK; - WT_FULL_BARRIER(); return (ret); } diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index 112f0725f94..5b0bf53dc6c 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -179,7 +179,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) * Set the checkpointing flag to block such actions and wait for * any problematic eviction or page splits to complete. */ - WT_PUBLISH(btree->checkpointing, WT_CKPT_PREPARE); + btree->checkpointing = WT_CKPT_PREPARE; (void)__wt_gen_next_drain(session, WT_GEN_EVICT); btree->checkpointing = WT_CKPT_RUNNING; diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c index 9e8545453d3..06c6354148c 100644 --- a/src/cache/cache_las.c +++ b/src/cache/cache_las.c @@ -140,8 +140,9 @@ __wt_las_set_written(WT_SESSION_IMPL *session) conn->las_written = true; /* - * Push the flag: unnecessary, but from now page reads must deal - * with lookaside table records, and we only do the write once. + * Future page reads must deal with lookaside table records. + * No write could be cached until a future read might matter, + * the barrier is more documentation than requirement. */ WT_FULL_BARRIER(); } diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index 47ba4d45dc3..08b572244af 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -391,13 +391,11 @@ __log_file_server(void *arg) WT_ERR(__wt_log_extract_lognum(session, close_fh->name, &filenum)); /* - * We update the close file handle before updating the - * close LSN when changing files. It is possible we - * could see mismatched settings. If we do, yield - * until it is set. This should rarely happen. + * The closing file handle should have a correct close + * LSN. */ - while (log->log_close_lsn.l.file < filenum) - __wt_yield(); + WT_ASSERT(session, + log->log_close_lsn.l.file == filenum); if (__wt_log_cmp( &log->write_lsn, &log->log_close_lsn) >= 0) { diff --git a/src/log/log.c b/src/log/log.c index f95ad93d872..3c37e1eb326 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -876,18 +876,16 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) __wt_yield(); } /* - * Note, the file server worker thread has code that knows that - * the file handle is set before the LSN. Do not reorder without - * also reviewing that code. + * Note, the file server worker thread requires the LSN be set once the + * close file handle is set, force that ordering. */ - log->log_close_fh = log->log_fh; - if (log->log_close_fh != NULL) + if (log->log_fh == NULL) + log->log_close_fh = NULL; + else { log->log_close_lsn = log->alloc_lsn; + WT_PUBLISH(log->log_close_fh, log->log_fh); + } log->fileid++; - /* - * Make sure everything we set above is visible. - */ - WT_FULL_BARRIER(); /* * If pre-allocating log files look for one; otherwise, or if we don't diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 88b3e1980be..f391c553d2a 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -284,12 +284,8 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) manager = &conn->lsm_manager; removed = 0; - /* - * Clear the LSM server flag and flush to ensure running threads see - * the state change. - */ + /* Clear the LSM server flag. */ F_CLR(conn, WT_CONN_SERVER_LSM); - WT_FULL_BARRIER(); WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) || manager->lsm_workers == 0); diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index fb8eb9d38a7..fe36237969f 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -768,13 +768,13 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_ERR(__wt_lsm_meta_write(session, lsm_tree, NULL)); lsm_tree->need_switch = false; - ++lsm_tree->dsk_gen; - lsm_tree->modified = true; + /* * Ensure the updated disk generation is visible to all other threads * before updating the transaction ID. */ + ++lsm_tree->dsk_gen; WT_FULL_BARRIER(); /* diff --git a/src/os_posix/os_sleep.c b/src/os_posix/os_sleep.c index a0545d3f5fe..67c0aaa375c 100644 --- a/src/os_posix/os_sleep.c +++ b/src/os_posix/os_sleep.c @@ -18,6 +18,14 @@ __wt_sleep(uint64_t seconds, uint64_t micro_seconds) { struct timeval t; + /* + * Sleeping isn't documented as a memory barrier, and it's a reasonable + * expectation to have. There's no reason not to explicitly include a + * barrier since we're giving up the CPU, and ensures callers are never + * surprised. + */ + WT_FULL_BARRIER(); + t.tv_sec = (time_t)(seconds + micro_seconds / WT_MILLION); t.tv_usec = (suseconds_t)(micro_seconds % WT_MILLION); diff --git a/src/os_win/os_sleep.c b/src/os_win/os_sleep.c index 4b6bdaea0be..477474e0665 100644 --- a/src/os_win/os_sleep.c +++ b/src/os_win/os_sleep.c @@ -18,8 +18,15 @@ __wt_sleep(uint64_t seconds, uint64_t micro_seconds) DWORD dwMilliseconds; /* - * If the caller wants a small pause, set to our - * smallest granularity. + * Sleeping isn't documented as a memory barrier, and it's a reasonable + * expectation to have. There's no reason not to explicitly include a + * barrier since we're giving up the CPU, and ensures callers are never + * surprised. + */ + WT_FULL_BARRIER(); + + /* + * If the caller wants a small pause, set to our smallest granularity. */ if (seconds == 0 && micro_seconds < WT_THOUSAND) micro_seconds = WT_THOUSAND; diff --git a/src/session/session_compact.c b/src/session/session_compact.c index 30c6ad297f7..c4710dbb1a5 100644 --- a/src/session/session_compact.c +++ b/src/session/session_compact.c @@ -226,7 +226,10 @@ __compact_checkpoint(WT_SESSION_IMPL *session) */ txn_global = &S2C(session)->txn_global; for (txn_gen = __wt_gen(session, WT_GEN_CHECKPOINT);;) { - WT_READ_BARRIER(); + /* + * This loop only checks objects that are declared volatile, + * therefore no barriers are needed. + */ if (!txn_global->checkpoint_running || txn_gen != __wt_gen(session, WT_GEN_CHECKPOINT)) break; diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 92dfd9e3887..82163f471b8 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -943,13 +943,11 @@ __txn_checkpoint_wrapper(WT_SESSION_IMPL *session, const char *cfg[]) WT_STAT_CONN_SET(session, txn_checkpoint_running, 1); txn_global->checkpoint_running = true; - WT_FULL_BARRIER(); ret = __txn_checkpoint(session, cfg); WT_STAT_CONN_SET(session, txn_checkpoint_running, 0); txn_global->checkpoint_running = false; - WT_FULL_BARRIER(); return (ret); } @@ -1446,8 +1444,7 @@ __checkpoint_tree( * the checkpoint start, which might not be included, will re-set the * modified flag. The "unless reconciliation skips updates" problem is * handled in the reconciliation code: if reconciliation skips updates, - * it sets the modified flag itself. Use a full barrier so we get the - * store done quickly, this isn't a performance path. + * it sets the modified flag itself. */ btree->modified = false; WT_FULL_BARRIER(); -- cgit v1.2.1 From f8245eb29c38b494e37d1e587ed0774e86639421 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 19 Apr 2017 00:38:41 -0400 Subject: WT-3281 stress test sanitizer failure (#3387) There's underlying code that copies internal values out of pinned pages and into external buffers AFTER a search has potentially evicted the page. Change the cursor operations to copy or discard values which reference pinned pages before doing a search, the same way we handle keys. --- dist/s_string.ok | 2 + src/btree/bt_cursor.c | 120 +++++++++++++++++++++++----------------------- src/include/cursor.i | 50 +++++++++++++------ src/session/session_api.c | 6 +-- 4 files changed, 97 insertions(+), 81 deletions(-) diff --git a/dist/s_string.ok b/dist/s_string.ok index 1285d70897e..7f8234d007a 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -866,7 +866,9 @@ llll llu loadtext localTime +localkey localtime +localvalue logf logmgr lognum diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 41701cc0a73..664545ee3a0 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -63,29 +63,6 @@ __cursor_page_pinned(WT_CURSOR_BTREE *cbt) cbt->ref->page->read_gen != WT_READGEN_OLDEST); } -/* - * __cursor_copy_int_key -- - * If we're pointing into the tree, save the key into local memory. - */ -static inline int -__cursor_copy_int_key(WT_CURSOR *cursor) -{ - /* - * We're about to discard the cursor's position and the cursor layer - * might retry the operation. We discard pinned pages on error, which - * will invalidate pinned keys. Clear WT_CURSTD_KEY_INT in all cases, - * the underlying page is gone whether we can allocate memory or not. - */ - if (F_ISSET(cursor, WT_CURSTD_KEY_INT)) { - F_CLR(cursor, WT_CURSTD_KEY_INT); - if (!WT_DATA_IN_ITEM(&cursor->key)) - WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session, - &cursor->key, cursor->key.data, cursor->key.size)); - F_SET(cursor, WT_CURSTD_KEY_EXT); - } - return (0); -} - /* * __cursor_size_chk -- * Return if an inserted item is too large. @@ -431,10 +408,14 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) __cursor_state_save(cursor, &state); /* - * The pinned page goes away if we do a search, make sure there's a - * local copy of any key, then re-save the cursor state. + * The pinned page goes away if we search the tree, get a local copy of + * any pinned key and discard any pinned value, then re-save the cursor + * state. Done before searching pinned pages (unlike other cursor + * functions), because we don't anticipate applications searching for a + * key they currently have pinned.) */ - WT_ERR(__cursor_copy_int_key(cursor)); + WT_ERR(__cursor_localkey(cursor)); + __cursor_novalue(cursor); __cursor_state_save(cursor, &state); /* @@ -516,10 +497,14 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) __cursor_state_save(cursor, &state); /* - * The pinned page goes away if we do a search, make sure there's a - * local copy of any key, then re-save the cursor state. + * The pinned page goes away if we search the tree, get a local copy of + * any pinned key and discard any pinned value, then re-save the cursor + * state. Done before searching pinned pages (unlike other cursor + * functions), because we don't anticipate applications searching for a + * key they currently have pinned.) */ - WT_ERR(__cursor_copy_int_key(cursor)); + WT_ERR(__cursor_localkey(cursor)); + __cursor_novalue(cursor); __cursor_state_save(cursor, &state); /* @@ -640,8 +625,6 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_STAT_DATA_INCRV(session, cursor_insert_bytes, cursor->key.size + cursor->value.size); - __cursor_state_save(cursor, &state); - if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); WT_RET(__cursor_size_chk(session, &cursor->value)); @@ -658,6 +641,9 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) append_key = F_ISSET(cursor, WT_CURSTD_APPEND) && btree->type != BTREE_ROW; + /* Save the cursor state. */ + __cursor_state_save(cursor, &state); + /* * If inserting with overwrite configured, and positioned to an on-page * key, the update doesn't require another search. The cursor won't be @@ -682,22 +668,24 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) goto done; /* - * The pinned page goes away if we fail for any reason, make - * sure there's a local copy of any key. (Restart could still + * The pinned page goes away if we fail for any reason, get a + * local copy of any pinned key or value. (Restart could still * use the pinned page, but that's an unlikely path.) Re-save * the cursor state: we may retry but eventually fail. */ - WT_TRET(__cursor_copy_int_key(cursor)); + WT_TRET(__cursor_localkey(cursor)); + WT_TRET(__cursor_localvalue(cursor)); __cursor_state_save(cursor, &state); goto err; } /* - * The pinned page goes away if we do a search, make sure there's a - * local copy of any key. Re-save the cursor state: we may retry but + * The pinned page goes away if we do a search, get a local copy of any + * pinned key or value. Re-save the cursor state: we may retry but * eventually fail. */ - WT_ERR(__cursor_copy_int_key(cursor)); + WT_ERR(__cursor_localkey(cursor)); + WT_ERR(__cursor_localvalue(cursor)); __cursor_state_save(cursor, &state); retry: WT_ERR(__cursor_func_init(cbt, true)); @@ -812,12 +800,13 @@ __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) session = (WT_SESSION_IMPL *)cursor->session; /* - * The pinned page goes away if we do a search, make sure there's a - * local copy of any key. Unlike most of the btree cursor routines, - * we don't have to save/restore the cursor key state, none of the - * work done here changes the key state. + * The pinned page goes away if we do a search, get a local copy of any + * pinned key and discard any pinned value. Unlike most of the btree + * cursor routines, we don't have to save/restore the cursor key state, + * none of the work done here changes the cursor state. */ - WT_ERR(__cursor_copy_int_key(cursor)); + WT_ERR(__cursor_localkey(cursor)); + __cursor_novalue(cursor); retry: WT_ERR(__cursor_func_init(cbt, true)); @@ -865,14 +854,15 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) WT_STAT_DATA_INCR(session, cursor_remove); WT_STAT_DATA_INCRV(session, cursor_remove_bytes, cursor->key.size); - __cursor_state_save(cursor, &state); - /* * WT_CURSOR.remove has a unique semantic, the cursor stays positioned * if it starts positioned, otherwise clear the cursor on completion. */ positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT); + /* Save the cursor state. */ + __cursor_state_save(cursor, &state); + /* * If remove positioned to an on-page key, the remove doesn't require * another search. We don't care about the "overwrite" configuration @@ -897,22 +887,27 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) goto done; /* - * The pinned page goes away if we fail for any reason, make - * sure there's a local copy of any key. (Restart could still - * use the pinned page, but that's an unlikely path.) Re-save - * the cursor state: we may retry but eventually fail. + * The pinned page goes away if we fail for any reason, get a + * local copy of any pinned key and discard any value (remove + * discards any previous value on success or failure). (Restart + * could still use the pinned page, but that's an unlikely + * path.) Re-save the cursor state: we may retry but eventually + * fail. */ - WT_TRET(__cursor_copy_int_key(cursor)); + WT_TRET(__cursor_localkey(cursor)); + F_CLR(cursor, WT_CURSTD_VALUE_SET); __cursor_state_save(cursor, &state); goto err; } /* - * The pinned page goes away if we do a search, make sure there's a - * local copy of any key. Re-save the cursor state: we may retry but - * eventually fail. + * The pinned page goes away if we do a search, get a local copy of any + * pinned key and discard any value (remove discards any previous + * value on success or failure). Re-save the cursor state: we may retry + * but eventually fail. */ - WT_ERR(__cursor_copy_int_key(cursor)); + WT_ERR(__cursor_localkey(cursor)); + F_CLR(cursor, WT_CURSTD_VALUE_SET); __cursor_state_save(cursor, &state); retry: WT_ERR(__cursor_func_init(cbt, true)); @@ -1007,8 +1002,6 @@ __btcur_update(WT_CURSOR_BTREE *cbt, bool is_reserve) WT_STAT_DATA_INCR(session, cursor_update); WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size); - __cursor_state_save(cursor, &state); - if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); if (!is_reserve) @@ -1017,6 +1010,9 @@ __btcur_update(WT_CURSOR_BTREE *cbt, bool is_reserve) /* It's no longer possible to bulk-load into the tree. */ __cursor_disable_bulk(session, btree); + /* Save the cursor state. */ + __cursor_state_save(cursor, &state); + /* * If update positioned to an on-page key, the update doesn't require * another search. We don't care about the "overwrite" configuration @@ -1040,22 +1036,24 @@ __btcur_update(WT_CURSOR_BTREE *cbt, bool is_reserve) goto done; /* - * The pinned page goes away if we fail for any reason, make - * sure there's a local copy of any key. (Restart could still + * The pinned page goes away if we fail for any reason, get a + * a local copy of any pinned key or value. (Restart could still * use the pinned page, but that's an unlikely path.) Re-save * the cursor state: we may retry but eventually fail. */ - WT_TRET(__cursor_copy_int_key(cursor)); + WT_TRET(__cursor_localkey(cursor)); + WT_TRET(__cursor_localvalue(cursor)); __cursor_state_save(cursor, &state); goto err; } /* - * The pinned page goes away if we do a search, make sure there's a - * local copy of any key. Re-save the cursor state: we may retry but + * The pinned page goes away if we do a search, get a local copy of any + * pinned key or value. Re-save the cursor state: we may retry but * eventually fail. */ - WT_ERR(__cursor_copy_int_key(cursor)); + WT_ERR(__cursor_localkey(cursor)); + WT_ERR(__cursor_localvalue(cursor)); __cursor_state_save(cursor, &state); retry: WT_ERR(__cursor_func_init(cbt, true)); diff --git a/src/include/cursor.i b/src/include/cursor.i index 4b6c5e74320..75fd935fc91 100644 --- a/src/include/cursor.i +++ b/src/include/cursor.i @@ -51,14 +51,11 @@ __cursor_checkvalue(WT_CURSOR *cursor) } /* - * __cursor_needkey -- - * - * Check if we have a key set. There's an additional semantic here: if we're - * pointing into the tree, get a local copy of whatever we're referencing in - * the tree, there's an obvious race with the cursor moving and the reference. + * __cursor_localkey -- + * If the key points into the tree, get a local copy. */ static inline int -__cursor_needkey(WT_CURSOR *cursor) +__cursor_localkey(WT_CURSOR *cursor) { if (F_ISSET(cursor, WT_CURSTD_KEY_INT)) { if (!WT_DATA_IN_ITEM(&cursor->key)) @@ -66,20 +63,16 @@ __cursor_needkey(WT_CURSOR *cursor) &cursor->key, cursor->key.data, cursor->key.size)); F_CLR(cursor, WT_CURSTD_KEY_INT); F_SET(cursor, WT_CURSTD_KEY_EXT); - return (0); } - return (__cursor_checkkey(cursor)); + return (0); } /* - * __cursor_needvalue -- - * - * Check if we have a value set. There's an additional semantic here: if we're - * pointing into the tree, get a local copy of whatever we're referencing in - * the tree, there's an obvious race with the cursor moving and the reference. + * __cursor_localvalue -- + * If the value points into the tree, get a local copy. */ static inline int -__cursor_needvalue(WT_CURSOR *cursor) +__cursor_localvalue(WT_CURSOR *cursor) { if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) { if (!WT_DATA_IN_ITEM(&cursor->value)) @@ -88,8 +81,35 @@ __cursor_needvalue(WT_CURSOR *cursor) cursor->value.data, cursor->value.size)); F_CLR(cursor, WT_CURSTD_VALUE_INT); F_SET(cursor, WT_CURSTD_VALUE_EXT); - return (0); } + return (0); +} + +/* + * __cursor_needkey -- + * + * Check if we have a key set. There's an additional semantic here: if we're + * pointing into the tree, get a local copy of whatever we're referencing in + * the tree, there's an obvious race with the cursor moving and the reference. + */ +static inline int +__cursor_needkey(WT_CURSOR *cursor) +{ + WT_RET(__cursor_localkey(cursor)); + return (__cursor_checkkey(cursor)); +} + +/* + * __cursor_needvalue -- + * + * Check if we have a value set. There's an additional semantic here: if we're + * pointing into the tree, get a local copy of whatever we're referencing in + * the tree, there's an obvious race with the cursor moving and the reference. + */ +static inline int +__cursor_needvalue(WT_CURSOR *cursor) +{ + WT_RET(__cursor_localkey(cursor)); return (__cursor_checkvalue(cursor)); } diff --git a/src/session/session_api.c b/src/session/session_api.c index 3c01dadc48c..d7fdc12fc3c 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -72,11 +72,7 @@ __wt_session_copy_values(WT_SESSION_IMPL *session) (WT_PREFIX_MATCH(cursor->uri, "file:") && F_ISSET((WT_CURSOR_BTREE *)cursor, WT_CBT_NO_TXN))); #endif - - F_CLR(cursor, WT_CURSTD_VALUE_INT); - WT_RET(__wt_buf_set(session, &cursor->value, - cursor->value.data, cursor->value.size)); - F_SET(cursor, WT_CURSTD_VALUE_EXT); + WT_RET(__cursor_localvalue(cursor)); } return (0); -- cgit v1.2.1 From dff28119fc6dec90eae1de08ee8a8d76cf1c1be6 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 19 Apr 2017 02:49:39 -0400 Subject: WT-3288 fix error codes for event_handler to be consistent in file operations (#3393) `__wt_formatmessage()` takes a raw Windows error, `__wt_err()` takes the mapped Windows error and we want to return the mapped Windows error. Use a new pattern: `windows_error` is the raw Windows error, `ret` is the mapped Windows error. --- src/os_win/os_fs.c | 93 ++++++++++++++++++++++++++++++++---------------------- src/support/err.c | 2 ++ 2 files changed, 57 insertions(+), 38 deletions(-) diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c index 6cbb89ba37d..1410a7bad03 100644 --- a/src/os_win/os_fs.c +++ b/src/os_win/os_fs.c @@ -55,10 +55,11 @@ __win_fs_remove(WT_FILE_SYSTEM *file_system, if (DeleteFileW(name_wide->data) == FALSE) { windows_error = __wt_getlasterror(); - __wt_errx(session, + ret = __wt_map_windows_error(windows_error); + __wt_err(session, ret, "%s: file-remove: DeleteFileW: %s", name, __wt_formatmessage(session, windows_error)); - WT_ERR(__wt_map_windows_error(windows_error)); + WT_ERR(ret); } err: __wt_scr_free(session, &name_wide); @@ -74,9 +75,9 @@ __win_fs_rename(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags) { DWORD windows_error; - WT_DECL_RET; WT_DECL_ITEM(from_wide); WT_DECL_ITEM(to_wide); + WT_DECL_RET; WT_SESSION_IMPL *session; WT_UNUSED(file_system); @@ -98,10 +99,11 @@ __win_fs_rename(WT_FILE_SYSTEM *file_system, if (MoveFileExW(from_wide->data, to_wide->data, MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH) == FALSE) { windows_error = __wt_getlasterror(); - __wt_errx(session, + ret = __wt_map_windows_error(windows_error); + __wt_err(session, ret, "%s to %s: file-rename: MoveFileExW: %s", from, to, __wt_formatmessage(session, windows_error)); - WT_ERR(__wt_map_windows_error(windows_error)); + WT_ERR(ret); } err: __wt_scr_free(session, &from_wide); @@ -118,9 +120,9 @@ __wt_win_fs_size(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, wt_off_t *sizep) { DWORD windows_error; - WT_DECL_RET; WIN32_FILE_ATTRIBUTE_DATA data; WT_DECL_ITEM(name_wide); + WT_DECL_RET; WT_SESSION_IMPL *session; WT_UNUSED(file_system); @@ -131,10 +133,11 @@ __wt_win_fs_size(WT_FILE_SYSTEM *file_system, if (GetFileAttributesExW( name_wide->data, GetFileExInfoStandard, &data) == 0) { windows_error = __wt_getlasterror(); - __wt_errx(session, + ret = __wt_map_windows_error(windows_error); + __wt_err(session, ret, "%s: file-size: GetFileAttributesEx: %s", name, __wt_formatmessage(session, windows_error)); - WT_ERR(__wt_map_windows_error(windows_error)); + WT_ERR(ret); } *sizep = ((int64_t)data.nFileSizeHigh << 32) | data.nFileSizeLow; @@ -168,21 +171,21 @@ __win_file_close(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session) if (win_fh->filehandle != INVALID_HANDLE_VALUE && CloseHandle(win_fh->filehandle) == 0) { windows_error = __wt_getlasterror(); - __wt_errx(session, + ret = __wt_map_windows_error(windows_error); + __wt_err(session, ret, "%s: handle-close: CloseHandle: %s", file_handle->name, __wt_formatmessage(session, windows_error)); - ret = __wt_map_windows_error(windows_error); } if (win_fh->filehandle_secondary != INVALID_HANDLE_VALUE && CloseHandle(win_fh->filehandle_secondary) == 0) { windows_error = __wt_getlasterror(); - __wt_errx(session, + ret = __wt_map_windows_error(windows_error); + __wt_err(session, ret, "%s: handle-close: secondary: CloseHandle: %s", file_handle->name, __wt_formatmessage(session, windows_error)); - ret = __wt_map_windows_error(windows_error); } __wt_free(session, file_handle->name); @@ -199,6 +202,7 @@ __win_file_lock( WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, bool lock) { DWORD windows_error; + WT_DECL_RET; WT_FILE_HANDLE_WIN *win_fh; WT_SESSION_IMPL *session; @@ -218,22 +222,22 @@ __win_file_lock( if (lock) { if (LockFile(win_fh->filehandle, 0, 0, 1, 0) == FALSE) { windows_error = __wt_getlasterror(); - __wt_errx(session, + ret = __wt_map_windows_error(windows_error); + __wt_err(session, ret, "%s: handle-lock: LockFile: %s", file_handle->name, __wt_formatmessage(session, windows_error)); - return (__wt_map_windows_error(windows_error)); } } else if (UnlockFile(win_fh->filehandle, 0, 0, 1, 0) == FALSE) { windows_error = __wt_getlasterror(); - __wt_errx(session, + ret = __wt_map_windows_error(windows_error); + __wt_err(session, ret, "%s: handle-lock: UnlockFile: %s", file_handle->name, __wt_formatmessage(session, windows_error)); - return (__wt_map_windows_error(windows_error)); } - return (0); + return (ret); } /* @@ -245,10 +249,11 @@ __win_file_read(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, size_t len, void *buf) { DWORD chunk, nr, windows_error; - uint8_t *addr; OVERLAPPED overlapped = { 0 }; + WT_DECL_RET; WT_FILE_HANDLE_WIN *win_fh; WT_SESSION_IMPL *session; + uint8_t *addr; win_fh = (WT_FILE_HANDLE_WIN *)file_handle; session = (WT_SESSION_IMPL *)wt_session; @@ -273,12 +278,13 @@ __win_file_read(WT_FILE_HANDLE *file_handle, if (!ReadFile( win_fh->filehandle, addr, chunk, &nr, &overlapped)) { windows_error = __wt_getlasterror(); - __wt_errx(session, + ret = __wt_map_windows_error(windows_error); + __wt_err(session, ret, "%s: handle-read: ReadFile: failed to read %lu " "bytes at offset %" PRIuMAX ": %s", file_handle->name, chunk, (uintmax_t)offset, __wt_formatmessage(session, windows_error)); - return (__wt_map_windows_error(windows_error)); + return (ret); } } return (0); @@ -293,9 +299,10 @@ __win_file_size( WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t *sizep) { DWORD windows_error; + LARGE_INTEGER size; + WT_DECL_RET; WT_FILE_HANDLE_WIN *win_fh; WT_SESSION_IMPL *session; - LARGE_INTEGER size; win_fh = (WT_FILE_HANDLE_WIN *)file_handle; session = (WT_SESSION_IMPL *)wt_session; @@ -306,10 +313,11 @@ __win_file_size( } windows_error = __wt_getlasterror(); - __wt_errx(session, + ret = __wt_map_windows_error(windows_error); + __wt_err(session, ret, "%s: handle-size: GetFileSizeEx: %s", file_handle->name, __wt_formatmessage(session, windows_error)); - return (__wt_map_windows_error(windows_error)); + return (ret); } /* @@ -320,6 +328,7 @@ static int __win_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session) { DWORD windows_error; + WT_DECL_RET; WT_FILE_HANDLE_WIN *win_fh; WT_SESSION_IMPL *session; @@ -337,11 +346,12 @@ __win_file_sync(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session) if (FlushFileBuffers(win_fh->filehandle) == FALSE) { windows_error = __wt_getlasterror(); - __wt_errx(session, + ret = __wt_map_windows_error(windows_error); + __wt_err(session, ret, "%s handle-sync: FlushFileBuffers: %s", file_handle->name, __wt_formatmessage(session, windows_error)); - return (__wt_map_windows_error(windows_error)); + return (ret); } return (0); } @@ -355,9 +365,10 @@ __win_file_set_end( WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t len) { DWORD windows_error; + LARGE_INTEGER largeint; + WT_DECL_RET; WT_FILE_HANDLE_WIN *win_fh; WT_SESSION_IMPL *session; - LARGE_INTEGER largeint; win_fh = (WT_FILE_HANDLE_WIN *)file_handle; session = (WT_SESSION_IMPL *)wt_session; @@ -372,22 +383,24 @@ __win_file_set_end( if (SetFilePointerEx(win_fh->filehandle_secondary, largeint, NULL, FILE_BEGIN) == FALSE) { windows_error = __wt_getlasterror(); - __wt_errx(session, + ret = __wt_map_windows_error(windows_error); + __wt_err(session, ret, "%s: handle-set-end: SetFilePointerEx: %s", file_handle->name, __wt_formatmessage(session, windows_error)); - return (__wt_map_windows_error(windows_error)); + return (ret); } if (SetEndOfFile(win_fh->filehandle_secondary) == FALSE) { if (GetLastError() == ERROR_USER_MAPPED_FILE) return (EBUSY); windows_error = __wt_getlasterror(); - __wt_errx(session, + ret = __wt_map_windows_error(windows_error); + __wt_err(session, ret, "%s: handle-set-end: SetEndOfFile: %s", file_handle->name, __wt_formatmessage(session, windows_error)); - return (__wt_map_windows_error(windows_error)); + return (ret); } return (0); } @@ -401,10 +414,11 @@ __win_file_write(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset, size_t len, const void *buf) { DWORD chunk, nw, windows_error; - const uint8_t *addr; OVERLAPPED overlapped = { 0 }; + WT_DECL_RET; WT_FILE_HANDLE_WIN *win_fh; WT_SESSION_IMPL *session; + const uint8_t *addr; win_fh = (WT_FILE_HANDLE_WIN *)file_handle; session = (WT_SESSION_IMPL *)wt_session; @@ -429,12 +443,13 @@ __win_file_write(WT_FILE_HANDLE *file_handle, if (!WriteFile( win_fh->filehandle, addr, chunk, &nw, &overlapped)) { windows_error = __wt_getlasterror(); - __wt_errx(session, + ret = __wt_map_windows_error(windows_error); + __wt_err(session, ret, "%s: handle-write: WriteFile: failed to write %lu " "bytes at offset %" PRIuMAX ": %s", file_handle->name, chunk, (uintmax_t)offset, __wt_formatmessage(session, windows_error)); - return (__wt_map_windows_error(windows_error)); + return (ret); } } return (0); @@ -451,8 +466,8 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, { DWORD dwCreationDisposition, windows_error; WT_CONNECTION_IMPL *conn; - WT_DECL_RET; WT_DECL_ITEM(name_wide); + WT_DECL_RET; WT_FILE_HANDLE *file_handle; WT_FILE_HANDLE_WIN *win_fh; WT_SESSION_IMPL *session; @@ -538,14 +553,15 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, NULL, OPEN_EXISTING, f, NULL); if (win_fh->filehandle == INVALID_HANDLE_VALUE) { windows_error = __wt_getlasterror(); - __wt_errx(session, + ret = __wt_map_windows_error(windows_error); + __wt_err(session, ret, win_fh->direct_io ? "%s: handle-open: CreateFileW: failed with direct " "I/O configured, some filesystem types do not " "support direct I/O: %s" : "%s: handle-open: CreateFileW: %s", name, __wt_formatmessage(session, windows_error)); - WT_ERR(__wt_map_windows_error(windows_error)); + WT_ERR(ret); } } @@ -560,10 +576,11 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, NULL, OPEN_EXISTING, f, NULL); if (win_fh->filehandle_secondary == INVALID_HANDLE_VALUE) { windows_error = __wt_getlasterror(); - __wt_errx(session, + ret = __wt_map_windows_error(windows_error); + __wt_err(session, ret, "%s: handle-open: Creatively: secondary: %s", name, __wt_formatmessage(session, windows_error)); - WT_ERR(__wt_map_windows_error(windows_error)); + WT_ERR(ret); } } diff --git a/src/support/err.c b/src/support/err.c index a91f994b445..7f6c835ab29 100644 --- a/src/support/err.c +++ b/src/support/err.c @@ -144,6 +144,8 @@ __wt_event_handler_set(WT_SESSION_IMPL *session, WT_EVENT_HANDLER *handler) handler->handle_message = __handle_message_default; if (handler->handle_progress == NULL) handler->handle_progress = __handle_progress_default; + if (handler->handle_close == NULL) + handler->handle_close = __handle_close_default; } session->event_handler = handler; -- cgit v1.2.1 From 20580c7792d616136bbb25ad00b0842ac278347a Mon Sep 17 00:00:00 2001 From: sueloverso Date: Wed, 19 Apr 2017 16:37:19 -0400 Subject: WT-3263 Allow recovery log file archiving on clean shutdown. (#3384) --- src/include/connection.h | 7 ++++--- src/txn/txn_log.c | 20 +++++++++++--------- src/txn/txn_recover.c | 3 +++ test/recovery/random-abort.c | 4 ++-- test/suite/test_txn02.py | 1 - test/suite/test_txn05.py | 11 ++++++++--- 6 files changed, 28 insertions(+), 18 deletions(-) diff --git a/src/include/connection.h b/src/include/connection.h index b75e2fe1e58..6f656270f38 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -325,9 +325,10 @@ struct __wt_connection_impl { #define WT_CONN_LOG_ARCHIVE 0x01 /* Archive is enabled */ #define WT_CONN_LOG_ENABLED 0x02 /* Logging is enabled */ #define WT_CONN_LOG_EXISTED 0x04 /* Log files found */ -#define WT_CONN_LOG_RECOVER_DONE 0x08 /* Recovery completed */ -#define WT_CONN_LOG_RECOVER_ERR 0x10 /* Error if recovery required */ -#define WT_CONN_LOG_ZERO_FILL 0x20 /* Manually zero files */ +#define WT_CONN_LOG_RECOVER_DIRTY 0x08 /* Recovering unclean */ +#define WT_CONN_LOG_RECOVER_DONE 0x10 /* Recovery completed */ +#define WT_CONN_LOG_RECOVER_ERR 0x20 /* Error if recovery required */ +#define WT_CONN_LOG_ZERO_FILL 0x40 /* Manually zero files */ uint32_t log_flags; /* Global logging configuration */ WT_CONDVAR *log_cond; /* Log server wait mutex */ WT_SESSION_IMPL *log_session; /* Log server session */ diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c index 67de19abe4f..fae2027e1ec 100644 --- a/src/txn/txn_log.c +++ b/src/txn/txn_log.c @@ -334,6 +334,7 @@ int __wt_txn_checkpoint_log( WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp) { + WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(logrec); WT_DECL_RET; WT_ITEM *ckpt_snapshot, empty; @@ -344,6 +345,7 @@ __wt_txn_checkpoint_log( uint32_t i, rectype = WT_LOGREC_CHECKPOINT; const char *fmt = WT_UNCHECKED_STRING(IIIIu); + conn = S2C(session); txn = &session->txn; ckpt_lsn = &txn->ckpt_lsn; @@ -408,20 +410,20 @@ __wt_txn_checkpoint_log( txn->ckpt_nsnapshot, ckpt_snapshot)); logrec->size += (uint32_t)recsize; WT_ERR(__wt_log_write(session, logrec, lsnp, - F_ISSET(S2C(session), WT_CONN_CKPT_SYNC) ? + F_ISSET(conn, WT_CONN_CKPT_SYNC) ? WT_LOG_FSYNC : 0)); /* * If this full checkpoint completed successfully and there is - * no hot backup in progress and this is not recovery, tell - * the logging subsystem the checkpoint LSN so that it can - * archive. Do not update the logging checkpoint LSN if this - * is during a clean connection close, only during a full - * checkpoint. A clean close may not update any metadata LSN - * and we do not want to archive in that case. + * no hot backup in progress and this is not an unclean + * recovery, tell the logging subsystem the checkpoint LSN so + * that it can archive. Do not update the logging checkpoint + * LSN if this is during a clean connection close, only during + * a full checkpoint. A clean close may not update any + * metadata LSN and we do not want to archive in that case. */ - if (!S2C(session)->hot_backup && - !F_ISSET(S2C(session), WT_CONN_RECOVERING) && + if (!conn->hot_backup && + !FLD_ISSET(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY) && txn->full_ckpt) __wt_log_ckpt(session, ckpt_lsn); diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c index 93f5fa5d15e..97e8e98f8e0 100644 --- a/src/txn/txn_recover.c +++ b/src/txn/txn_recover.c @@ -535,6 +535,8 @@ __wt_txn_recover(WT_SESSION_IMPL *session) * this is not a read-only connection. * We can consider skipping it in the future. */ + if (needs_rec) + FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY); if (WT_IS_INIT_LSN(&r.ckpt_lsn)) WT_ERR(__wt_log_scan(session, NULL, WT_LOGSCAN_FIRST | WT_LOGSCAN_RECOVER, @@ -559,6 +561,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session) done: FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE); err: WT_TRET(__recovery_free(&r)); __wt_free(session, config); + FLD_CLR(conn->log_flags, WT_CONN_LOG_RECOVER_DIRTY); if (ret != 0) __wt_err(session, ret, "Recovery failed"); diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c index 1a759590871..12f86d664ef 100644 --- a/test/recovery/random-abort.c +++ b/test/recovery/random-abort.c @@ -47,9 +47,9 @@ static bool inmem; #define RECORDS_FILE "records-%" PRIu32 #define ENV_CONFIG_DEF \ - "create,log=(file_max=10M,archive=false,enabled)" + "create,log=(file_max=10M,enabled)" #define ENV_CONFIG_TXNSYNC \ - "create,log=(file_max=10M,archive=false,enabled)," \ + "create,log=(file_max=10M,enabled)," \ "transaction_sync=(enabled,method=none)" #define ENV_CONFIG_REC "log=(recover=on)" #define MAX_VAL 4096 diff --git a/test/suite/test_txn02.py b/test/suite/test_txn02.py index f51d69d4a6c..b61a9ed9f99 100644 --- a/test/suite/test_txn02.py +++ b/test/suite/test_txn02.py @@ -172,7 +172,6 @@ class test_txn02(wttest.WiredTigerTestCase, suite_subprocess): try: session = backup_conn.open_session() finally: - session.checkpoint("force") self.check(backup_conn.open_session(), None, committed) # Sleep long enough so that the archive thread is guaranteed # to run before we close the connection. diff --git a/test/suite/test_txn05.py b/test/suite/test_txn05.py index 69bb9611262..6a5be0a5df4 100644 --- a/test/suite/test_txn05.py +++ b/test/suite/test_txn05.py @@ -137,12 +137,12 @@ class test_txn05(wttest.WiredTigerTestCase, suite_subprocess): session = backup_conn.open_session() finally: self.check(session, None, committed) - # Force a checkpoint because we don't record the recovery - # checkpoint as available for archiving. - session.checkpoint("force") # Sleep long enough so that the archive thread is guaranteed # to run before we close the connection. time.sleep(1.0) + if count == 0: + first_logs = \ + fnmatch.filter(os.listdir(self.backup_dir), "*Log*") backup_conn.close() count += 1 # @@ -152,6 +152,11 @@ class test_txn05(wttest.WiredTigerTestCase, suite_subprocess): # cur_logs = fnmatch.filter(os.listdir(self.backup_dir), "*Log*") for o in orig_logs: + # Creating the backup was effectively an unclean shutdown so + # even after sleeping, we should never archive log files + # because a checkpoint has not run. Later opens and runs of + # recovery will detect a clean shutdown and allow archiving. + self.assertEqual(True, o in first_logs) if self.archive == 'true': self.assertEqual(False, o in cur_logs) else: -- cgit v1.2.1 From 26a3c01c8a2f2a6f4112f8b7b86b62b94e1cbff2 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Thu, 20 Apr 2017 17:39:58 -0400 Subject: WT-3282 Split cache flags and cache_pool flags. (#3396) * WT-3282 Split cache flags and cache_pool flags. * KNF --- src/conn/conn_cache_pool.c | 17 +++++++++-------- src/evict/evict_lru.c | 5 ++--- src/include/cache.h | 7 ++++--- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c index 4eee206c241..c1c9c98b30c 100644 --- a/src/conn/conn_cache_pool.c +++ b/src/conn/conn_cache_pool.c @@ -277,7 +277,7 @@ __wt_conn_cache_pool_open(WT_SESSION_IMPL *session) * the active connection shuts down. */ F_SET(cp, WT_CACHE_POOL_ACTIVE); - F_SET(cache, WT_CACHE_POOL_RUN); + FLD_SET(cache->pool_flags, WT_CACHE_POOL_RUN); WT_RET(__wt_thread_create(session, &cache->cp_tid, __wt_cache_pool_server, cache->cp_session)); @@ -340,7 +340,7 @@ __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session) __wt_spin_unlock(session, &cp->cache_pool_lock); cp_locked = false; - F_CLR(cache, WT_CACHE_POOL_RUN); + FLD_CLR(cache->pool_flags, WT_CACHE_POOL_RUN); __wt_cond_signal(session, cp->cache_pool_cond); WT_TRET(__wt_thread_join(session, cache->cp_tid)); @@ -399,7 +399,7 @@ __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session) __wt_spin_unlock(session, &cp->cache_pool_lock); /* Notify other participants if we were managing */ - if (F_ISSET(cache, WT_CACHE_POOL_MANAGER)) { + if (FLD_ISSET(cache->pool_flags, WT_CACHE_POOL_MANAGER)) { cp->pool_managed = 0; __wt_verbose(session, WT_VERB_SHARED_CACHE, "Shutting down shared cache manager connection"); @@ -449,7 +449,8 @@ __cache_pool_balance(WT_SESSION_IMPL *session, bool forward) for (i = 0; i < 2 * WT_CACHE_POOL_BUMP_THRESHOLD && F_ISSET(cp, WT_CACHE_POOL_ACTIVE) && - F_ISSET(S2C(session)->cache, WT_CACHE_POOL_RUN); i++) { + FLD_ISSET(S2C(session)->cache->pool_flags, WT_CACHE_POOL_RUN); + i++) { __cache_pool_adjust( session, highest, bump_threshold, forward, &adjusted); /* @@ -760,7 +761,7 @@ __wt_cache_pool_server(void *arg) forward = true; while (F_ISSET(cp, WT_CACHE_POOL_ACTIVE) && - F_ISSET(cache, WT_CACHE_POOL_RUN)) { + FLD_ISSET(cache->pool_flags, WT_CACHE_POOL_RUN)) { if (cp->currently_used <= cp->size) __wt_cond_wait( session, cp->cache_pool_cond, WT_MILLION, NULL); @@ -770,12 +771,12 @@ __wt_cache_pool_server(void *arg) * lock on shutdown. */ if (!F_ISSET(cp, WT_CACHE_POOL_ACTIVE) && - F_ISSET(cache, WT_CACHE_POOL_RUN)) + FLD_ISSET(cache->pool_flags, WT_CACHE_POOL_RUN)) break; /* Try to become the managing thread */ if (__wt_atomic_cas8(&cp->pool_managed, 0, 1)) { - F_SET(cache, WT_CACHE_POOL_MANAGER); + FLD_SET(cache->pool_flags, WT_CACHE_POOL_MANAGER); __wt_verbose(session, WT_VERB_SHARED_CACHE, "Cache pool switched manager thread"); } @@ -784,7 +785,7 @@ __wt_cache_pool_server(void *arg) * Continue even if there was an error. Details of errors are * reported in the balance function. */ - if (F_ISSET(cache, WT_CACHE_POOL_MANAGER)) { + if (FLD_ISSET(cache->pool_flags, WT_CACHE_POOL_MANAGER)) { __cache_pool_balance(session, forward); forward = !forward; } diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 041e557ef78..48d1ae95547 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -562,7 +562,7 @@ __evict_update_work(WT_SESSION_IMPL *session) cache = conn->cache; /* Clear previous state. */ - F_CLR(cache, WT_CACHE_EVICT_MASK); + cache->flags = 0; if (!F_ISSET(conn, WT_CONN_EVICTION_RUN)) return (false); @@ -619,8 +619,7 @@ __evict_update_work(WT_SESSION_IMPL *session) F_CLR(cache, WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_CLEAN_HARD); } - WT_STAT_CONN_SET(session, cache_eviction_state, - F_MASK(cache, WT_CACHE_EVICT_MASK)); + WT_STAT_CONN_SET(session, cache_eviction_state, cache->flags); return (F_ISSET(cache, WT_CACHE_EVICT_ALL | WT_CACHE_EVICT_URGENT)); } diff --git a/src/include/cache.h b/src/include/cache.h index 8f439599eca..a3fc17b9740 100644 --- a/src/include/cache.h +++ b/src/include/cache.h @@ -179,6 +179,10 @@ struct __wt_cache { /* * Flags. */ +#define WT_CACHE_POOL_MANAGER 0x001 /* The active cache pool manager */ +#define WT_CACHE_POOL_RUN 0x002 /* Cache pool thread running */ + uint32_t pool_flags; /* Cache pool flags */ + #define WT_CACHE_EVICT_CLEAN 0x001 /* Evict clean pages */ #define WT_CACHE_EVICT_CLEAN_HARD 0x002 /* Clean % blocking app threads */ #define WT_CACHE_EVICT_DIRTY 0x004 /* Evict dirty pages */ @@ -186,9 +190,6 @@ struct __wt_cache { #define WT_CACHE_EVICT_SCRUB 0x010 /* Scrub dirty pages */ #define WT_CACHE_EVICT_URGENT 0x020 /* Pages are in the urgent queue */ #define WT_CACHE_EVICT_ALL (WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_DIRTY) -#define WT_CACHE_EVICT_MASK 0x0FF -#define WT_CACHE_POOL_MANAGER 0x100 /* The active cache pool manager */ -#define WT_CACHE_POOL_RUN 0x200 /* Cache pool thread running */ uint32_t flags; }; -- cgit v1.2.1 From 96ee1d3f21d434a6c4389a82092f570d211ad608 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Fri, 21 Apr 2017 00:22:15 -0400 Subject: WT-3293 Don't explicitly mark internal symbols hidden. (#3398) It messes with external stack decoders (e.g., MongoDB's built-in heap profiling). --- dist/s_prototypes | 3 - src/include/extern.h | 1450 ++++++++++++++++++++++---------------------- src/include/extern_posix.h | 44 +- src/include/extern_win.h | 66 +- 4 files changed, 780 insertions(+), 783 deletions(-) diff --git a/dist/s_prototypes b/dist/s_prototypes index d6228866f08..9675cd5a843 100755 --- a/dist/s_prototypes +++ b/dist/s_prototypes @@ -42,9 +42,6 @@ proto() -e '# Add the warn_unused_result attribute to any external' \ -e '# functions that return an int.' \ -e '/^extern int /s/$/ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result))/' \ - -e '# Add the hidden attribute to any external functions without' \ - -e '# an explicit visibility.' \ - -e '/visibility/!s/$/ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")))/' \ -e 's/$/;/' \ -e p < $1 } diff --git a/src/include/extern.h b/src/include/extern.h index ed40df8280f..232ea6ff4e2 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1,637 +1,637 @@ /* DO NOT EDIT: automatically built by dist/s_prototypes. */ -extern void __wt_async_stats_update(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_async_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_async_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_async_flush(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_async_new_op(WT_SESSION_IMPL *session, const char *uri, const char *config, const char *cfg[], WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP_IMPL **opp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_async_op_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern WT_THREAD_RET __wt_async_worker(void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_addr_to_buffer(WT_BLOCK *block, uint8_t **pp, wt_off_t offset, uint32_t size, uint32_t checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_buffer_to_addr(WT_BLOCK *block, const uint8_t *p, wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_addr_invalid(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool live) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_addr_string(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_buffer_to_ckpt(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *p, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_async_stats_update(WT_SESSION_IMPL *session); +extern int __wt_async_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_async_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_async_flush(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_async_new_op(WT_SESSION_IMPL *session, const char *uri, const char *config, const char *cfg[], WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP_IMPL **opp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_async_op_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern WT_THREAD_RET __wt_async_worker(void *arg); +extern int __wt_block_addr_to_buffer(WT_BLOCK *block, uint8_t **pp, wt_off_t offset, uint32_t size, uint32_t checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_buffer_to_addr(WT_BLOCK *block, const uint8_t *p, wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_addr_invalid(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool live) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_addr_string(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_buffer_to_ckpt(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *p, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_ckpt_decode(WT_SESSION *wt_session, size_t allocsize, const uint8_t *p, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_ckpt_to_buffer(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t **pp, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_ckpt_init( WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, uint8_t *root_addr, size_t *root_addr_sizep, bool checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_checkpoint_unload( WT_SESSION_IMPL *session, WT_BLOCK *block, bool checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_block_ckpt_destroy(WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_checkpoint(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, WT_CKPT *ckptbase, bool data_checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_checkpoint_resolve(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_compact_start(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_compact_page_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_misplaced(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *tag, wt_off_t offset, uint32_t size, bool live) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_off_remove_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_alloc( WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_free(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_off_free( WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_extlist_check( WT_SESSION_IMPL *session, WT_EXTLIST *al, WT_EXTLIST *bl) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_extlist_overlap( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *a, WT_EXTLIST *b) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_insert_ext(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_extlist_read_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_extlist_read(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, WT_EXTLIST *additional) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_extlist_truncate( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_extlist_init(WT_SESSION_IMPL *session, WT_EXTLIST *el, const char *name, const char *extname, bool track_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, size_t *lengthp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_panic(WT_SESSION_IMPL *session, int error, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_manager_drop( WT_SESSION_IMPL *session, const char *filename, bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_desc_write(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_manager_named_size( WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_bm_preload( WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_read_off_blind( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset, uint32_t size, uint32_t checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_ext_alloc(WT_SESSION_IMPL *session, WT_EXT **extp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_block_ext_free(WT_SESSION_IMPL *session, WT_EXT *ext) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_size_alloc(WT_SESSION_IMPL *session, WT_SIZE **szp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_block_size_free(WT_SESSION_IMPL *session, WT_SIZE *sz) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_ext_prealloc(WT_SESSION_IMPL *session, u_int max) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_ext_discard(WT_SESSION_IMPL *session, u_int max) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_salvage_end(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern bool __wt_block_offset_invalid(WT_BLOCK *block, wt_off_t offset, uint32_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_salvage_next(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, size_t *addr_sizep, bool *eofp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_salvage_valid(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, size_t addr_size, bool valid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_verify_start(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_verify_end(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_verify_ckpt_load( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_verify_ckpt_unload(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_checksum, bool checkpoint_io) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump, bool data_checksum, bool checkpoint_io, bool caller_locked) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_block_ckpt_to_buffer(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t **pp, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_ckpt_init( WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, uint8_t *root_addr, size_t *root_addr_sizep, bool checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_checkpoint_unload( WT_SESSION_IMPL *session, WT_BLOCK *block, bool checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_block_ckpt_destroy(WT_SESSION_IMPL *session, WT_BLOCK_CKPT *ci); +extern int __wt_block_checkpoint(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, WT_CKPT *ckptbase, bool data_checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_checkpoint_resolve(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_compact_start(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_compact_page_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_misplaced(WT_SESSION_IMPL *session, WT_BLOCK *block, const char *tag, wt_off_t offset, uint32_t size, bool live) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_off_remove_overlap(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_alloc( WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_free(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_off_free( WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t offset, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_check( WT_SESSION_IMPL *session, WT_EXTLIST *al, WT_EXTLIST *bl) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_overlap( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_merge(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *a, WT_EXTLIST *b) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_insert_ext(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_read_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_read(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, wt_off_t ckpt_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el, WT_EXTLIST *additional) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_truncate( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_extlist_init(WT_SESSION_IMPL *session, WT_EXTLIST *el, const char *name, const char *extname, bool track_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el); +extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, size_t *lengthp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_panic(WT_SESSION_IMPL *session, int error, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_manager_drop( WT_SESSION_IMPL *session, const char *filename, bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on); +extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_desc_write(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats); +extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_manager_named_size( WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bm_preload( WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_read_off_blind( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset, uint32_t size, uint32_t checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_ext_alloc(WT_SESSION_IMPL *session, WT_EXT **extp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_block_ext_free(WT_SESSION_IMPL *session, WT_EXT *ext); +extern int __wt_block_size_alloc(WT_SESSION_IMPL *session, WT_SIZE **szp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_block_size_free(WT_SESSION_IMPL *session, WT_SIZE *sz); +extern int __wt_block_ext_prealloc(WT_SESSION_IMPL *session, u_int max) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_ext_discard(WT_SESSION_IMPL *session, u_int max) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_salvage_end(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_block_offset_invalid(WT_BLOCK *block, wt_off_t offset, uint32_t size); +extern int __wt_block_salvage_next(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, size_t *addr_sizep, bool *eofp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_salvage_valid(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, size_t addr_size, bool valid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_verify_start(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_verify_end(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verify_ckpt_load( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verify_ckpt_unload(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_checksum, bool checkpoint_io) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *checksump, bool data_checksum, bool checkpoint_io, bool caller_locked) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_bloom_create( WT_SESSION_IMPL *session, const char *uri, const char *config, uint64_t count, uint32_t factor, uint32_t k, WT_BLOOM **bloomp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_bloom_open(WT_SESSION_IMPL *session, const char *uri, uint32_t factor, uint32_t k, WT_CURSOR *owner, WT_BLOOM **bloomp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern int __wt_bloom_finalize(WT_BLOOM *bloom) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern void __wt_bloom_hash(WT_BLOOM *bloom, WT_ITEM *key, WT_BLOOM_HASH *bhash) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_bloom_hash_get(WT_BLOOM *bloom, WT_BLOOM_HASH *bhash) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_bloom_hash(WT_BLOOM *bloom, WT_ITEM *key, WT_BLOOM_HASH *bhash); +extern int __wt_bloom_hash_get(WT_BLOOM *bloom, WT_BLOOM_HASH *bhash) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_bloom_get(WT_BLOOM *bloom, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_bloom_inmem_get(WT_BLOOM *bloom, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_bloom_intersection(WT_BLOOM *bloom, WT_BLOOM *other) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_bloom_inmem_get(WT_BLOOM *bloom, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bloom_intersection(WT_BLOOM *bloom, WT_BLOOM *other) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_bloom_close(WT_BLOOM *bloom) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_bloom_drop(WT_BLOOM *bloom, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_compact(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_key_order_check( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_key_order_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cursor_key_order_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern bool __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_reserve(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_btcur_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_btcur_open(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_debug_addr_print( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_debug_addr(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_debug_offset_blind( WT_SESSION_IMPL *session, wt_off_t offset, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_debug_offset(WT_SESSION_IMPL *session, wt_off_t offset, uint32_t size, uint32_t checksum, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_debug_disk( WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_debug_tree_shape( WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_debug_tree_all( WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_debug_tree( WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_debug_page(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_ref_out_int(WT_SESSION_IMPL *session, WT_REF *ref, bool rewrite) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_free_ref( WT_SESSION_IMPL *session, WT_REF *ref, int page_type, bool free_pages) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_free_ref_index(WT_SESSION_IMPL *session, WT_PAGE *page, WT_PAGE_INDEX *pindex, bool free_pages) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btree_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btree_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, bool is_recno) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btree_tree_open( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool checkpoint, bool checkpoint_io, bool compressed) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_compact(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_key_order_check( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_key_order_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cursor_key_order_reset(WT_CURSOR_BTREE *cbt); +extern void __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt); +extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp); +extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_reserve(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_compare(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_btcur_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt); +extern void __wt_btcur_open(WT_CURSOR_BTREE *cbt); +extern int __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_addr_print( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_addr(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_offset_blind( WT_SESSION_IMPL *session, wt_off_t offset, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_offset(WT_SESSION_IMPL *session, wt_off_t offset, uint32_t size, uint32_t checksum, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_disk( WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_tree_shape( WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_tree_all( WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_tree( WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_debug_page(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref); +extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all); +extern int __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_ref_out_int(WT_SESSION_IMPL *session, WT_REF *ref, bool rewrite); +extern void __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref); +extern void __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep); +extern void __wt_free_ref( WT_SESSION_IMPL *session, WT_REF *ref, int page_type, bool free_pages); +extern void __wt_free_ref_index(WT_SESSION_IMPL *session, WT_PAGE *page, WT_PAGE_INDEX *pindex, bool free_pages); +extern void __wt_free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd); +extern int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btree_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btree_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, bool is_recno); +extern int __wt_btree_tree_open( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session); +extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool checkpoint, bool checkpoint_io, bool compressed) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern const char *__wt_page_type_string(u_int type) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); -extern const char *__wt_cell_type_string(uint8_t type) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern const char *__wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ovfl_cache(WT_SESSION_IMPL *session, WT_PAGE *page, void *cookie, WT_CELL_UNPACK *vpack) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, size_t memsize, uint32_t flags, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, bool eviction) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_las_remove_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern const char *__wt_cell_type_string(uint8_t type); +extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf); +extern const char *__wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, WT_ITEM *buf); +extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_cache(WT_SESSION_IMPL *session, WT_PAGE *page, void *cookie, WT_CELL_UNPACK *vpack) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, size_t memsize, uint32_t flags, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, bool eviction) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_remove_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags #ifdef HAVE_DIAGNOSTIC , const char *file, int line #endif - ) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cache_op(WT_SESSION_IMPL *session, WT_CACHE_OP op) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, size_t size, bool empty_page_ok) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_tree_walk_skip( WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_leaf_key_work(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip_arg, WT_ITEM *keyb, bool instantiate) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_IKEY **ikeyp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, const WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, size_t *sizep, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_update_obsolete_free( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_las_stats_update(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_las_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_las_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_las_set_written(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern bool __wt_las_is_written(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_las_cursor_open(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_las_sweep(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); + ); +extern int __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cache_op(WT_SESSION_IMPL *session, WT_CACHE_OP op) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, size_t size, bool empty_page_ok) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_tree_walk_skip( WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_leaf_key_work(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip_arg, WT_ITEM *keyb, bool instantiate) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_IKEY **ikeyp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, const WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, size_t *sizep, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd); +extern void __wt_update_obsolete_free( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd); +extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_las_stats_update(WT_SESSION_IMPL *session); +extern int __wt_las_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_las_set_written(WT_SESSION_IMPL *session); +extern bool __wt_las_is_written(WT_SESSION_IMPL *session); +extern int __wt_las_cursor_open(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags); +extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_sweep(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern uint32_t __wt_checksum_sw(const void *chunk, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); -extern void __wt_checksum_init(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_config_initn( WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_config_init(WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_config_subinit( WT_SESSION_IMPL *session, WT_CONFIG *conf, WT_CONFIG_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_config_next(WT_CONFIG *conf, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_config_get(WT_SESSION_IMPL *session, const char **cfg_arg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_config_gets(WT_SESSION_IMPL *session, const char **cfg, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_config_gets_none(WT_SESSION_IMPL *session, const char **cfg, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_config_getone(WT_SESSION_IMPL *session, const char *config, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_config_getones(WT_SESSION_IMPL *session, const char *config, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_config_getones_none(WT_SESSION_IMPL *session, const char *config, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_config_gets_def(WT_SESSION_IMPL *session, const char **cfg, const char *key, int def, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_config_subgetraw(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_config_subgets(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_conn_foc_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_configure_method(WT_SESSION_IMPL *session, const char *method, const char *uri, const char *config, const char *type, const char *check) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_config_check(WT_SESSION_IMPL *session, const WT_CONFIG_ENTRY *entry, const char *config, size_t config_len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_config_collapse( WT_SESSION_IMPL *session, const char **cfg, char **config_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_checksum_init(void); +extern void __wt_config_initn( WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str, size_t len); +extern void __wt_config_init(WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str); +extern void __wt_config_subinit( WT_SESSION_IMPL *session, WT_CONFIG *conf, WT_CONFIG_ITEM *item); +extern int __wt_config_next(WT_CONFIG *conf, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_get(WT_SESSION_IMPL *session, const char **cfg_arg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_gets(WT_SESSION_IMPL *session, const char **cfg, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_gets_none(WT_SESSION_IMPL *session, const char **cfg, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_getone(WT_SESSION_IMPL *session, const char *config, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_getones(WT_SESSION_IMPL *session, const char *config, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_getones_none(WT_SESSION_IMPL *session, const char *config, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_gets_def(WT_SESSION_IMPL *session, const char **cfg, const char *key, int def, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_subgetraw(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_subgets(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cfg, const char *key, WT_CONFIG_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_conn_foc_discard(WT_SESSION_IMPL *session); +extern int __wt_configure_method(WT_SESSION_IMPL *session, const char *method, const char *uri, const char *config, const char *type, const char *check) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_check(WT_SESSION_IMPL *session, const WT_CONFIG_ENTRY *entry, const char *config, size_t config_len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_collapse( WT_SESSION_IMPL *session, const char **cfg, char **config_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_config_merge(WT_SESSION_IMPL *session, const char **cfg, const char *cfg_strip, const char **config_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_conn_config_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_conn_config_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern const WT_CONFIG_ENTRY *__wt_conn_config_match(const char *method) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_config_get(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_CONFIG_ARG *cfg_arg, const char *key, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_config_get_string(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *config, const char *key, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_config_parser_open(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session, const char *config, size_t len, WT_CONFIG_PARSER **config_parserp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_config_parser_open_arg(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session, WT_CONFIG_ARG *cfg_arg, WT_CONFIG_PARSER **config_parserp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_config_upgrade(WT_SESSION_IMPL *session, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern const char *__wt_wiredtiger_error(int error) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_collator_config(WT_SESSION_IMPL *session, const char *uri, WT_CONFIG_ITEM *cname, WT_CONFIG_ITEM *metadata, WT_COLLATOR **collatorp, int *ownp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_conn_remove_collator(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_compressor_config( WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_COMPRESSOR **compressorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_conn_remove_compressor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_conn_remove_data_source(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_encryptor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_CONFIG_ITEM *keyid, WT_CONFIG_ARG *cfg_arg, WT_KEYED_ENCRYPTOR **kencryptorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_conn_remove_encryptor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_extractor_config(WT_SESSION_IMPL *session, const char *uri, const char *config, WT_EXTRACTOR **extractorp, int *ownp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_conn_remove_extractor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cache_config(WT_SESSION_IMPL *session, bool reconfigure, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cache_stats_update(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cache_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_conn_cache_pool_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern WT_THREAD_RET __wt_cache_pool_server(void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_conn_dhandle_alloc( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_conn_dhandle_find( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_conn_btree_open( WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_conn_dhandle_close_all( WT_SESSION_IMPL *session, const char *uri, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_conn_dhandle_discard_single( WT_SESSION_IMPL *session, bool final, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_connection_destroy(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logmgr_reconfig(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_truncate_files( WT_SESSION_IMPL *session, WT_CURSOR *cursor, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_log_wrlsn(WT_SESSION_IMPL *session, int *yield) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logmgr_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logmgr_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_connection_close(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_conn_stat_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_sweep_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_sweep_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_sweep_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_backup_file_remove(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curbulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool bitmap, bool skip_sort_check) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curds_open( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curfile_next_random(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curfile_insert_check(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curjoin_joined(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curjoin_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_INDEX *idx, WT_CURSOR *ref_cursor, uint8_t flags, uint8_t range, uint64_t count, uint32_t bloom_bit_count, uint32_t bloom_hash_count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, WT_CURSOR_JSON *json, bool iskey, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_conn_config_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_conn_config_discard(WT_SESSION_IMPL *session); +extern const WT_CONFIG_ENTRY *__wt_conn_config_match(const char *method); +extern int __wt_ext_config_get(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_CONFIG_ARG *cfg_arg, const char *key, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_config_get_string(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *config, const char *key, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_config_parser_open(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session, const char *config, size_t len, WT_CONFIG_PARSER **config_parserp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_config_parser_open_arg(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session, WT_CONFIG_ARG *cfg_arg, WT_CONFIG_PARSER **config_parserp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_config_upgrade(WT_SESSION_IMPL *session, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_wiredtiger_error(int error); +extern int __wt_collator_config(WT_SESSION_IMPL *session, const char *uri, WT_CONFIG_ITEM *cname, WT_CONFIG_ITEM *metadata, WT_COLLATOR **collatorp, int *ownp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_remove_collator(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_compressor_config( WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_COMPRESSOR **compressorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_remove_compressor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_remove_data_source(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_encryptor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_CONFIG_ITEM *keyid, WT_CONFIG_ARG *cfg_arg, WT_KEYED_ENCRYPTOR **kencryptorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_remove_encryptor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_extractor_config(WT_SESSION_IMPL *session, const char *uri, const char *config, WT_EXTRACTOR **extractorp, int *ownp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_remove_extractor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cache_config(WT_SESSION_IMPL *session, bool reconfigure, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cache_stats_update(WT_SESSION_IMPL *session); +extern int __wt_cache_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_cache_pool_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern WT_THREAD_RET __wt_cache_pool_server(void *arg); +extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize); +extern int __wt_conn_dhandle_alloc( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_dhandle_find( WT_SESSION_IMPL *session, const char *uri, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_btree_open( WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_dhandle_close_all( WT_SESSION_IMPL *session, const char *uri, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_dhandle_discard_single( WT_SESSION_IMPL *session, bool final, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_connection_destroy(WT_CONNECTION_IMPL *conn); +extern int __wt_logmgr_reconfig(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_truncate_files( WT_SESSION_IMPL *session, WT_CURSOR *cursor, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_log_wrlsn(WT_SESSION_IMPL *session, int *yield); +extern int __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logmgr_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logmgr_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_connection_close(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_conn_stat_init(WT_SESSION_IMPL *session); +extern int __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_sweep_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_sweep_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_sweep_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_backup_file_remove(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curbulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool bitmap, bool skip_sort_check) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curds_open( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curfile_next_random(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curfile_insert_check(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curjoin_joined(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curjoin_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_INDEX *idx, WT_CURSOR *ref_cursor, uint8_t flags, uint8_t range, uint64_t count, uint32_t bloom_bit_count, uint32_t bloom_hash_count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, WT_CURSOR_JSON *json, bool iskey, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor); extern size_t __wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); -extern void __wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat, const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat, const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf); extern int __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, const char **tokstart, size_t *toklen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern const char *__wt_json_tokname(int toktype) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); -extern int __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const char *format, WT_CURSOR_JSON *json, bool iskey, WT_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const char *format, WT_CURSOR_JSON *json, bool iskey, WT_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern ssize_t __wt_json_strlen(const char *src, size_t srclen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern int __wt_json_strncpy(WT_SESSION *wt_session, char **pdst, size_t dstlen, const char *src, size_t srclen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_curstat_dsrc_final(WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curstat_init(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *curjoin, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curstat_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_noop(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_notsup(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_get_value_notsup(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cursor_set_key_notsup(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cursor_set_value_notsup(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_compare_notsup(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_search_near_notsup(WT_CURSOR *cursor, int *exact) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_reconfigure_notsup(WT_CURSOR *cursor, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cursor_set_notsup(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_kv_not_set(WT_CURSOR *cursor, bool key) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_get_key(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cursor_set_key(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_get_raw_key(WT_CURSOR *cursor, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cursor_set_raw_key(WT_CURSOR *cursor, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_get_raw_value(WT_CURSOR *cursor, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cursor_set_raw_value(WT_CURSOR *cursor, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_get_keyv(WT_CURSOR *cursor, uint32_t flags, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cursor_set_keyv(WT_CURSOR *cursor, uint32_t flags, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_get_value(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_get_valuev(WT_CURSOR *cursor, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cursor_set_value(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cursor_set_valuev(WT_CURSOR *cursor, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_close(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_equals(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_dup_position(WT_CURSOR *to_dup, WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cursor_init(WT_CURSOR *cursor, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx, WT_CURSOR *cur, WT_CURSOR_TABLE *ctable, int (*f)(WT_CURSOR *)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curtable_get_key(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curtable_get_value(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_curtable_set_key(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_curtable_set_value(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_table_range_truncate(WT_CURSOR_TABLE *start, WT_CURSOR_TABLE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curtable_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_evict_server_wake(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern bool __wt_evict_thread_chk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_evict_thread_stop(WT_SESSION_IMPL *session, WT_THREAD *thread) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_evict_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_evict_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_verbose_dump_cache(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, bool *recp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_log_written_reset(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_get_all_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp, uint32_t *maxid, bool active_only) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_extract_lognum( WT_SESSION_IMPL *session, const char *name, uint32_t *id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_allocfile( WT_SESSION_IMPL *session, uint32_t lognum, const char *dest) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_remove(WT_SESSION_IMPL *session, const char *file_prefix, uint32_t lognum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, int (*func)(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, WT_LSN *next_lsnp, void *cookie, int firstrecord), void *cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_force_write(WT_SESSION_IMPL *session, bool retry, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logrec_alloc(WT_SESSION_IMPL *session, size_t size, WT_ITEM **logrecp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_logrec_free(WT_SESSION_IMPL *session, WT_ITEM **logrecp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logrec_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *rectypep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *optypep, uint32_t *opsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_col_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_col_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_col_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_col_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_col_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_col_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_col_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t start, uint64_t stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_col_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *startp, uint64_t *stopp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_col_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_row_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_row_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_row_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_row_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_row_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_row_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_row_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *start, WT_ITEM *stop, uint32_t mode) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_row_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_slot_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int64_t __wt_log_slot_release(WT_MYSLOT *myslot, int64_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_clsm_request_switch(WT_CURSOR_LSM *clsm) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_clsm_await_switch(WT_CURSOR_LSM *clsm) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_clsm_init_merge( WT_CURSOR *cursor, u_int start_chunk, uint32_t start_id, u_int nchunks) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_clsm_close(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_clsm_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_manager_config(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_manager_reconfig(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_manager_start(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_lsm_manager_free_work_unit( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT *entry) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_lsm_manager_clear_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_manager_pop_entry( WT_SESSION_IMPL *session, uint32_t type, WT_LSM_WORK_UNIT **entryp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, uint32_t type, uint32_t flags, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_merge_update_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int start_chunk, u_int nchunks, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *newconfig) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curstat_lsm_init( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_tree_close_all(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_tree_bloom_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t id, const char **retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_tree_chunk_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t id, const char **retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_tree_set_chunk_size( WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_tree_setup_chunk( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_tree_setup_bloom( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_tree_create(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_tree_get(WT_SESSION_IMPL *session, const char *uri, bool exclusive, WT_LSM_TREE **treep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_lsm_tree_release(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_lsm_tree_throttle( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool decrease_only) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_tree_retire_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int start_chunk, u_int nchunks) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_tree_alter( WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_tree_drop( WT_SESSION_IMPL *session, const char *name, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_tree_rename(WT_SESSION_IMPL *session, const char *olduri, const char *newuri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_tree_truncate( WT_SESSION_IMPL *session, const char *name, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_lsm_tree_readlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_lsm_tree_readunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_lsm_tree_writelock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_lsm_tree_writeunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_tree_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool force, WT_LSM_CHUNK **chunkp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_work_switch( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, bool *ran) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_apply_all(WT_SESSION_IMPL *session, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_checkpoint(WT_SESSION_IMPL *session, const char *fname, const char *checkpoint, WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_checkpoint_last_name( WT_SESSION_IMPL *session, const char *fname, const char **namep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *fname) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_ckptlist_get( WT_SESSION_IMPL *session, const char *fname, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase, WT_LSN *ckptlsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_meta_checkpoint_free(WT_SESSION_IMPL *session, WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_metadata_insert(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_metadata_remove( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_metadata_search(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_metadata_update(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_curlog_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_curstat_dsrc_final(WT_CURSOR_STAT *cst); +extern int __wt_curstat_init(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *curjoin, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curstat_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *other, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_noop(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_notsup(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_get_value_notsup(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cursor_set_key_notsup(WT_CURSOR *cursor, ...); +extern void __wt_cursor_set_value_notsup(WT_CURSOR *cursor, ...); +extern int __wt_cursor_compare_notsup(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_search_near_notsup(WT_CURSOR *cursor, int *exact) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_reconfigure_notsup(WT_CURSOR *cursor, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cursor_set_notsup(WT_CURSOR *cursor); +extern int __wt_cursor_kv_not_set(WT_CURSOR *cursor, bool key) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_get_key(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cursor_set_key(WT_CURSOR *cursor, ...); +extern int __wt_cursor_get_raw_key(WT_CURSOR *cursor, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cursor_set_raw_key(WT_CURSOR *cursor, WT_ITEM *key); +extern int __wt_cursor_get_raw_value(WT_CURSOR *cursor, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cursor_set_raw_value(WT_CURSOR *cursor, WT_ITEM *value); +extern int __wt_cursor_get_keyv(WT_CURSOR *cursor, uint32_t flags, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cursor_set_keyv(WT_CURSOR *cursor, uint32_t flags, va_list ap); +extern int __wt_cursor_get_value(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_get_valuev(WT_CURSOR *cursor, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cursor_set_value(WT_CURSOR *cursor, ...); +extern void __wt_cursor_set_valuev(WT_CURSOR *cursor, va_list ap); +extern int __wt_cursor_close(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_equals(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_dup_position(WT_CURSOR *to_dup, WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_init(WT_CURSOR *cursor, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx, WT_CURSOR *cur, WT_CURSOR_TABLE *ctable, int (*f)(WT_CURSOR *)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curtable_get_key(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curtable_get_value(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_curtable_set_key(WT_CURSOR *cursor, ...); +extern void __wt_curtable_set_value(WT_CURSOR *cursor, ...); +extern int __wt_table_range_truncate(WT_CURSOR_TABLE *start, WT_CURSOR_TABLE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curtable_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref); +extern void __wt_evict_server_wake(WT_SESSION_IMPL *session); +extern bool __wt_evict_thread_chk(WT_SESSION_IMPL *session); +extern int __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_evict_thread_stop(WT_SESSION_IMPL *session, WT_THREAD *thread) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_evict_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_evict_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session); +extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref); +extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v); +extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session); +extern int __wt_verbose_dump_cache(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session); +extern void __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn); +extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn); +extern int __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, bool *recp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_log_written_reset(WT_SESSION_IMPL *session); +extern int __wt_log_get_all_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp, uint32_t *maxid, bool active_only) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_extract_lognum( WT_SESSION_IMPL *session, const char *name, uint32_t *id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_allocfile( WT_SESSION_IMPL *session, uint32_t lognum, const char *dest) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_remove(WT_SESSION_IMPL *session, const char *file_prefix, uint32_t lognum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, int (*func)(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, WT_LSN *next_lsnp, void *cookie, int firstrecord), void *cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_force_write(WT_SESSION_IMPL *session, bool retry, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logrec_alloc(WT_SESSION_IMPL *session, size_t size, WT_ITEM **logrecp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_logrec_free(WT_SESSION_IMPL *session, WT_ITEM **logrecp); +extern int __wt_logrec_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *rectypep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *optypep, uint32_t *opsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t start, uint64_t stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *startp, uint64_t *stopp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_col_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *start, WT_ITEM *stop, uint32_t mode) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); +extern int __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_slot_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot); +extern int64_t __wt_log_slot_release(WT_MYSLOT *myslot, int64_t size); +extern void __wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); +extern int __wt_clsm_request_switch(WT_CURSOR_LSM *clsm) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_clsm_await_switch(WT_CURSOR_LSM *clsm) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_clsm_init_merge( WT_CURSOR *cursor, u_int start_chunk, uint32_t start_id, u_int nchunks) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_clsm_close(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_clsm_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_clsm_open_bulk(WT_CURSOR_LSM *clsm, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_manager_config(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_manager_reconfig(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_manager_start(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_lsm_manager_free_work_unit( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT *entry); +extern int __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_lsm_manager_clear_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree); +extern int __wt_lsm_manager_pop_entry( WT_SESSION_IMPL *session, uint32_t type, WT_LSM_WORK_UNIT **entryp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, uint32_t type, uint32_t flags, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_merge_update_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int start_chunk, u_int nchunks, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_meta_write(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, const char *newconfig) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curstat_lsm_init( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_close_all(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_bloom_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t id, const char **retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_chunk_name(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t id, const char **retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_set_chunk_size( WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_setup_chunk( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_setup_bloom( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_create(WT_SESSION_IMPL *session, const char *uri, bool exclusive, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_get(WT_SESSION_IMPL *session, const char *uri, bool exclusive, WT_LSM_TREE **treep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_lsm_tree_release(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree); +extern void __wt_lsm_tree_throttle( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool decrease_only); +extern int __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_retire_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int start_chunk, u_int nchunks) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_alter( WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_drop( WT_SESSION_IMPL *session, const char *name, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_rename(WT_SESSION_IMPL *session, const char *olduri, const char *newuri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_truncate( WT_SESSION_IMPL *session, const char *name, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_lsm_tree_readlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree); +extern void __wt_lsm_tree_readunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree); +extern void __wt_lsm_tree_writelock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree); +extern void __wt_lsm_tree_writeunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree); +extern int __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_tree_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool force, WT_LSM_CHUNK **chunkp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_work_switch( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, bool *ran) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_lsm_worker_stop(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_apply_all(WT_SESSION_IMPL *session, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_checkpoint(WT_SESSION_IMPL *session, const char *fname, const char *checkpoint, WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_checkpoint_last_name( WT_SESSION_IMPL *session, const char *fname, const char **namep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *fname) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_ckptlist_get( WT_SESSION_IMPL *session, const char *fname, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase, WT_LSN *ckptlsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep); +extern void __wt_meta_checkpoint_free(WT_SESSION_IMPL *session, WT_CKPT *ckpt); +extern int __wt_ext_metadata_insert(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_metadata_remove( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_metadata_search(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_metadata_update(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_metadata_get_ckptlist( WT_SESSION *session, const char *name, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); -extern int __wt_metadata_cursor_open( WT_SESSION_IMPL *session, const char *config, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_metadata_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_metadata_cursor_release(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_metadata_insert( WT_SESSION_IMPL *session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_metadata_update( WT_SESSION_IMPL *session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_metadata_search(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_meta_track_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_track_on(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_meta_track_sub_on(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_track_sub_off(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_track_checkpoint(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_track_insert(WT_SESSION_IMPL *session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_track_update(WT_SESSION_IMPL *session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_track_fileop( WT_SESSION_IMPL *session, const char *olduri, const char *newuri) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_track_drop( WT_SESSION_IMPL *session, const char *filename) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_track_handle_lock(WT_SESSION_IMPL *session, bool created) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_track_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_turtle_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name, bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_metadata_cursor_open( WT_SESSION_IMPL *session, const char *config, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_cursor_release(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_insert( WT_SESSION_IMPL *session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_update( WT_SESSION_IMPL *session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_metadata_search(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_meta_track_discard(WT_SESSION_IMPL *session); +extern int __wt_meta_track_on(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_meta_track_sub_on(WT_SESSION_IMPL *session); +extern int __wt_meta_track_sub_off(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_checkpoint(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_insert(WT_SESSION_IMPL *session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_update(WT_SESSION_IMPL *session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_fileop( WT_SESSION_IMPL *session, const char *olduri, const char *newuri) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_drop( WT_SESSION_IMPL *session, const char *filename) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_handle_lock(WT_SESSION_IMPL *session, bool created) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_turtle_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name, bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); -extern int __wt_errno(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_map_windows_error( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint32_t windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_close_connection_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_os_inmemory(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags, uint32_t flags, WT_FSTREAM **fstrp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_os_stdio(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_errno(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen); +extern int __wt_ext_map_windows_error( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint32_t windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name); +extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_close_connection_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_os_inmemory(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags, uint32_t flags, WT_FSTREAM **fstrp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_os_stdio(WT_SESSION_IMPL *session); extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); -extern int __wt_ext_struct_pack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *buffer, size_t size, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_struct_size(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t *sizep, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_struct_unpack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const void *buffer, size_t size, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_struct_check(WT_SESSION_IMPL *session, const char *fmt, size_t len, bool *fixedp, uint32_t *fixed_lenp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_struct_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_struct_size(WT_SESSION_IMPL *session, size_t *sizep, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_struct_pack(WT_SESSION_IMPL *session, void *buffer, size_t size, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_struct_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt, const char *outfmt, const WT_ITEM *inbuf, WT_ITEM *outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_pack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format, void *buffer, size_t size, WT_PACK_STREAM **psp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_unpack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format, const void *buffer, size_t size, WT_PACK_STREAM **psp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_pack_close(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, size_t *usedp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_pack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_pack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t i) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_pack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char *s) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_pack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t u) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_unpack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_unpack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t *ip) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_unpack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char **sp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_unpack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t *up) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ovfl_discard_add(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_ovfl_discard_free(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ovfl_reuse_search(WT_SESSION_IMPL *session, WT_PAGE *page, uint8_t **addrp, size_t *addr_sizep, const void *value, size_t value_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ovfl_reuse_add(WT_SESSION_IMPL *session, WT_PAGE *page, const uint8_t *addr, size_t addr_size, const void *value, size_t value_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_ovfl_reuse_free(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ovfl_txnc_search( WT_PAGE *page, const uint8_t *addr, size_t addr_size, WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ovfl_txnc_add(WT_SESSION_IMPL *session, WT_PAGE *page, const uint8_t *addr, size_t addr_size, const void *value, size_t value_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_ovfl_txnc_free(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ovfl_track_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ovfl_track_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, uint32_t flags, bool *lookaside_retryp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern uint32_t __wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_bulk_insert_fix( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_bulk_insert_var( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_alter(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_direct_io_size_check(WT_SESSION_IMPL *session, const char **cfg, const char *config_name, uint32_t *allocsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_colgroup_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_index_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, const char *config, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_create( WT_SESSION_IMPL *session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_get_table(WT_SESSION_IMPL *session, const char *name, size_t namelen, bool ok_incomplete, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_schema_destroy_colgroup(WT_SESSION_IMPL *session, WT_COLGROUP **colgroupp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_destroy_index(WT_SESSION_IMPL *session, WT_INDEX **idxp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_destroy_table(WT_SESSION_IMPL *session, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_remove_table(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_close_tables(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_colgroup_name(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, size_t len, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_open_index(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_open_indices(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_get_colgroup(WT_SESSION_IMPL *session, const char *uri, bool quiet, WT_TABLE **tablep, WT_COLGROUP **colgroupp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_get_index(WT_SESSION_IMPL *session, const char *uri, bool quiet, WT_TABLE **tablep, WT_INDEX **indexp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_open_table(WT_SESSION_IMPL *session, const char *name, size_t namelen, bool ok_incomplete, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_colcheck(WT_SESSION_IMPL *session, const char *key_format, const char *value_format, WT_CONFIG_ITEM *colconf, u_int *kcolsp, u_int *vcolsp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_table_check(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, size_t len, bool value_only, WT_ITEM *plan) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_struct_reformat(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, size_t len, const char *extra_cols, bool value_only, WT_ITEM *format) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_struct_truncate(WT_SESSION_IMPL *session, const char *input_fmt, u_int ncols, WT_ITEM *format) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_project_in(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_project_out(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_project_slice(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, bool key_only, const char *vformat, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_project_merge(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, const char *vformat, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_rename(WT_SESSION_IMPL *session, const char *uri, const char *newuri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curstat_colgroup_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curstat_index_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_curstat_table_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_truncate( WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_range_truncate(WT_CURSOR *start, WT_CURSOR *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_range_truncate( WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_backup_check(WT_SESSION_IMPL *session, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern WT_DATA_SOURCE *__wt_schema_get_source(WT_SESSION_IMPL *session, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_str_name_check(WT_SESSION_IMPL *session, const char *str) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_session_notsup(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_session_copy_values(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_session_release_resources(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_session_create( WT_SESSION_IMPL *session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_session_range_truncate(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern const char *__wt_session_strerror(WT_SESSION *wt_session, int error) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_session_compact_check_timeout(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_session_compact_readonly( WT_SESSION *wt_session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_session_lock_dhandle( WT_SESSION_IMPL *session, uint32_t flags, bool *is_deadp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_session_release_btree(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_session_close_cache(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_session_get_btree(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cond_auto_alloc(WT_SESSION_IMPL *session, const char *name, uint64_t min, uint64_t max, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_auto_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_auto_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_decrypt(WT_SESSION_IMPL *session, WT_ENCRYPTOR *encryptor, size_t skip, WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_encrypt(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t skip, WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_encrypt_size(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_event_handler_set(WT_SESSION_IMPL *session, WT_EVENT_HANDLER *handler) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, const char *file_name, int line_number, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_ext_struct_pack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *buffer, size_t size, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_struct_size(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t *sizep, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_struct_unpack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const void *buffer, size_t size, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_check(WT_SESSION_IMPL *session, const char *fmt, size_t len, bool *fixedp, uint32_t *fixed_lenp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_confchk(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_size(WT_SESSION_IMPL *session, size_t *sizep, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_pack(WT_SESSION_IMPL *session, void *buffer, size_t size, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_repack(WT_SESSION_IMPL *session, const char *infmt, const char *outfmt, const WT_ITEM *inbuf, WT_ITEM *outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_pack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format, void *buffer, size_t size, WT_PACK_STREAM **psp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_unpack_start(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *format, const void *buffer, size_t size, WT_PACK_STREAM **psp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_pack_close(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, size_t *usedp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_pack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_pack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t i) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_pack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char *s) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_pack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t u) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_unpack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_unpack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t *ip) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_unpack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char **sp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_unpack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t *up) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_discard_add(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_ovfl_discard_free(WT_SESSION_IMPL *session, WT_PAGE *page); +extern int __wt_ovfl_reuse_search(WT_SESSION_IMPL *session, WT_PAGE *page, uint8_t **addrp, size_t *addr_sizep, const void *value, size_t value_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_reuse_add(WT_SESSION_IMPL *session, WT_PAGE *page, const uint8_t *addr, size_t addr_size, const void *value, size_t value_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_ovfl_reuse_free(WT_SESSION_IMPL *session, WT_PAGE *page); +extern int __wt_ovfl_txnc_search( WT_PAGE *page, const uint8_t *addr, size_t addr_size, WT_ITEM *store) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_txnc_add(WT_SESSION_IMPL *session, WT_PAGE *page, const uint8_t *addr, size_t addr_size, const void *value, size_t value_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_ovfl_txnc_free(WT_SESSION_IMPL *session, WT_PAGE *page); +extern int __wt_ovfl_track_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ovfl_track_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, uint32_t flags, bool *lookaside_retryp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern uint32_t __wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize); +extern int __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bulk_insert_fix( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bulk_insert_var( WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_alter(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_direct_io_size_check(WT_SESSION_IMPL *session, const char **cfg, const char *config_name, uint32_t *allocsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_colgroup_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_index_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, const char *config, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_create( WT_SESSION_IMPL *session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_get_table(WT_SESSION_IMPL *session, const char *name, size_t namelen, bool ok_incomplete, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_schema_release_table(WT_SESSION_IMPL *session, WT_TABLE *table); +extern void __wt_schema_destroy_colgroup(WT_SESSION_IMPL *session, WT_COLGROUP **colgroupp); +extern int __wt_schema_destroy_index(WT_SESSION_IMPL *session, WT_INDEX **idxp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_destroy_table(WT_SESSION_IMPL *session, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_remove_table(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_close_tables(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_colgroup_name(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, size_t len, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_open_index(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_open_indices(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_get_colgroup(WT_SESSION_IMPL *session, const char *uri, bool quiet, WT_TABLE **tablep, WT_COLGROUP **colgroupp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_get_index(WT_SESSION_IMPL *session, const char *uri, bool quiet, WT_TABLE **tablep, WT_INDEX **indexp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_open_table(WT_SESSION_IMPL *session, const char *name, size_t namelen, bool ok_incomplete, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_colcheck(WT_SESSION_IMPL *session, const char *key_format, const char *value_format, WT_CONFIG_ITEM *colconf, u_int *kcolsp, u_int *vcolsp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_table_check(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_plan(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, size_t len, bool value_only, WT_ITEM *plan) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_reformat(WT_SESSION_IMPL *session, WT_TABLE *table, const char *columns, size_t len, const char *extra_cols, bool value_only, WT_ITEM *format) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_struct_truncate(WT_SESSION_IMPL *session, const char *input_fmt, u_int ncols, WT_ITEM *format) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_project_in(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_project_out(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_project_slice(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, bool key_only, const char *vformat, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_project_merge(WT_SESSION_IMPL *session, WT_CURSOR **cp, const char *proj_arg, const char *vformat, WT_ITEM *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_rename(WT_SESSION_IMPL *session, const char *uri, const char *newuri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curstat_colgroup_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curstat_index_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curstat_table_init(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR_STAT *cst) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_truncate( WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_range_truncate(WT_CURSOR *start, WT_CURSOR *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_range_truncate( WT_SESSION_IMPL *session, WT_CURSOR *start, WT_CURSOR *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_backup_check(WT_SESSION_IMPL *session, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern WT_DATA_SOURCE *__wt_schema_get_source(WT_SESSION_IMPL *session, const char *name); +extern int __wt_str_name_check(WT_SESSION_IMPL *session, const char *str) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_notsup(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_copy_values(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_release_resources(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_create( WT_SESSION_IMPL *session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_range_truncate(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_session_strerror(WT_SESSION *wt_session, int error); +extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, bool open_metadata, WT_SESSION_IMPL **sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_compact_check_timeout(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_compact_readonly( WT_SESSION *wt_session, const char *uri, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_lock_dhandle( WT_SESSION_IMPL *session, uint32_t flags, bool *is_deadp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_release_btree(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_session_close_cache(WT_SESSION_IMPL *session); +extern int __wt_session_get_btree(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, const char *cfg[], uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cond_auto_alloc(WT_SESSION_IMPL *session, const char *name, uint64_t min, uint64_t max, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cond_auto_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled); +extern void __wt_cond_auto_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, bool progress, bool (*run_func)(WT_SESSION_IMPL *)); +extern int __wt_decrypt(WT_SESSION_IMPL *session, WT_ENCRYPTOR *encryptor, size_t skip, WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_encrypt(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t skip, WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_encrypt_size(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep); +extern void __wt_event_handler_set(WT_SESSION_IMPL *session, WT_EVENT_HANDLER *handler); +extern int __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, const char *file_name, int line_number, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_err(WT_SESSION_IMPL *session, int error, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); -extern void __wt_errx(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3))) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_err_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_msg_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern const char *__wt_ext_strerror(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, int error) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_progress(WT_SESSION_IMPL *session, const char *s, uint64_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_errx(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3))); +extern int __wt_ext_err_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_msg_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_ext_strerror(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, int error); +extern int __wt_progress(WT_SESSION_IMPL *session, const char *s, uint64_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_assert(WT_SESSION_IMPL *session, int error, const char *file_name, int line_number, const char *fmt, ...) @@ -643,140 +643,140 @@ __wt_assert(WT_SESSION_IMPL *session, WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern int __wt_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_illegal_value(WT_SESSION_IMPL *session, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_unexpected_object_type( WT_SESSION_IMPL *session, const char *uri, const char *expect) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_gen_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern uint64_t __wt_gen(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern uint64_t __wt_gen_next(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern uint64_t __wt_gen_next_drain(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_gen_drain(WT_SESSION_IMPL *session, int which, uint64_t generation) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern uint64_t __wt_gen_oldest(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern uint64_t __wt_session_gen(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_session_gen_enter(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_session_gen_leave(WT_SESSION_IMPL *session, int which) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stash_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_stash_add(WT_SESSION_IMPL *session, int which, uint64_t generation, void *p, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stash_discard_all(WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_library_init(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_breakpoint(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_attach(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern uint64_t __wt_hash_city64(const void *s, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern uint64_t __wt_hash_fnv64(const void *string, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_unexpected_object_type( WT_SESSION_IMPL *session, const char *uri, const char *expect) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_gen_init(WT_SESSION_IMPL *session); +extern uint64_t __wt_gen(WT_SESSION_IMPL *session, int which); +extern uint64_t __wt_gen_next(WT_SESSION_IMPL *session, int which); +extern uint64_t __wt_gen_next_drain(WT_SESSION_IMPL *session, int which); +extern void __wt_gen_drain(WT_SESSION_IMPL *session, int which, uint64_t generation); +extern uint64_t __wt_gen_oldest(WT_SESSION_IMPL *session, int which); +extern uint64_t __wt_session_gen(WT_SESSION_IMPL *session, int which); +extern void __wt_session_gen_enter(WT_SESSION_IMPL *session, int which); +extern void __wt_session_gen_leave(WT_SESSION_IMPL *session, int which); +extern void __wt_stash_discard(WT_SESSION_IMPL *session); +extern int __wt_stash_add(WT_SESSION_IMPL *session, int which, uint64_t generation, void *p, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_stash_discard_all(WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session); +extern int __wt_library_init(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_breakpoint(void); +extern void __wt_attach(WT_SESSION_IMPL *session); +extern uint64_t __wt_hash_city64(const void *s, size_t len); +extern uint64_t __wt_hash_fnv64(const void *string, size_t len); extern int __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp #ifdef HAVE_DIAGNOSTIC , const char *file, int line #endif - ) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_hazard_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern WT_HAZARD *__wt_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern u_int __wt_hazard_count(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_fill_hex(const uint8_t *src, size_t src_max, uint8_t *dest, size_t dest_max, size_t *lenp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_raw_to_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_raw_to_esc_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_hex2byte(const u_char *from, u_char *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_nhex_to_raw( WT_SESSION_IMPL *session, const char *from, size_t size, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_esc_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_huffman_open(WT_SESSION_IMPL *session, void *symbol_frequency_array, u_int symcnt, u_int numbytes, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_print_huffman_code(void *huffman_arg, uint16_t symbol) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern bool __wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern uint32_t __wt_nlpo2_round(uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern uint32_t __wt_nlpo2(uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern uint32_t __wt_log2_int(uint32_t n) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern bool __wt_ispo2(uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); + ); +extern int __wt_hazard_clear(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_hazard_close(WT_SESSION_IMPL *session); +extern WT_HAZARD *__wt_hazard_check(WT_SESSION_IMPL *session, WT_REF *ref); +extern u_int __wt_hazard_count(WT_SESSION_IMPL *session, WT_REF *ref); +extern void __wt_fill_hex(const uint8_t *src, size_t src_max, uint8_t *dest, size_t dest_max, size_t *lenp); +extern int __wt_raw_to_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_raw_to_esc_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_hex2byte(const u_char *from, u_char *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_nhex_to_raw( WT_SESSION_IMPL *session, const char *from, size_t size, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_esc_hex_to_raw(WT_SESSION_IMPL *session, const char *from, WT_ITEM *to) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_huffman_open(WT_SESSION_IMPL *session, void *symbol_frequency_array, u_int symcnt, u_int numbytes, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg); +extern void __wt_print_huffman_code(void *huffman_arg, uint16_t symbol); +extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l); +extern void __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK *l); +extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *l); +extern void __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l); +extern void __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l); +extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l); +extern void __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l); +extern bool __wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l); +extern uint32_t __wt_nlpo2_round(uint32_t v); +extern uint32_t __wt_nlpo2(uint32_t v); +extern uint32_t __wt_log2_int(uint32_t n); +extern bool __wt_ispo2(uint32_t v); +extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2); extern void __wt_random_init(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_random_init_seed( WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern uint32_t __wt_random(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern uint64_t __wt_random64(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); -extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern const char *__wt_buf_set_printable( WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern const char *__wt_buf_set_size( WT_SESSION_IMPL *session, uint64_t size, bool exact, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_buf_set_printable( WT_SESSION_IMPL *session, const void *p, size_t size, WT_ITEM *buf); +extern const char *__wt_buf_set_size( WT_SESSION_IMPL *session, uint64_t size, bool exact, WT_ITEM *buf); extern int __wt_scr_alloc_func(WT_SESSION_IMPL *session, size_t size, WT_ITEM **scratchp #ifdef HAVE_DIAGNOSTIC , const char *file, int line #endif - ) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_scr_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void *__wt_ext_scr_alloc( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_ext_scr_free(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *p) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_stat_dsrc_desc(WT_CURSOR_STAT *cst, int slot, const char **p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stat_dsrc_init_single(WT_DSRC_STATS *stats) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_stat_dsrc_init( WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stat_dsrc_discard( WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stat_dsrc_clear_all(WT_DSRC_STATS **stats) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stat_dsrc_aggregate_single( WT_DSRC_STATS *from, WT_DSRC_STATS *to) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stat_dsrc_aggregate( WT_DSRC_STATS **from, WT_DSRC_STATS *to) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_stat_connection_desc(WT_CURSOR_STAT *cst, int slot, const char **p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stat_connection_init_single(WT_CONNECTION_STATS *stats) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_stat_connection_init( WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stat_connection_discard( WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stat_connection_clear_all(WT_CONNECTION_STATS **stats) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stat_connection_aggregate( WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *to) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_stat_join_desc(WT_CURSOR_STAT *cst, int slot, const char **p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stat_join_init_single(WT_JOIN_STATS *stats) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stat_join_clear_single(WT_JOIN_STATS *stats) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stat_join_clear_all(WT_JOIN_STATS **stats) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stat_join_aggregate( WT_JOIN_STATS **from, WT_JOIN_STATS *to) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_thread_group_resize( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, uint32_t new_min, uint32_t new_max, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_thread_group_create( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, const char *name, uint32_t min, uint32_t max, uint32_t flags, bool (*chk_func)(WT_SESSION_IMPL *session), int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context), int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_thread_group_start_one( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool is_locked) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_thread_group_stop_one(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_txn_get_snapshot(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_txn_release(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_txn_stats_update(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_txn_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_txn_global_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_verbose_dump_txn(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern uint64_t __wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_transaction_isolation_level( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_transaction_notify( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_TXN_NOTIFY *notify) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern uint64_t __wt_ext_transaction_oldest(WT_EXTENSION_API *wt_api) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_ext_transaction_visible( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint64_t transaction_id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_txn_op_free(WT_SESSION_IMPL *session, WT_TXN_OP *op) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_log_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_checkpoint_logread(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *ckpt_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_checkpoint_log( WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_truncate_log( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_txn_truncate_end(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); + ); +extern void __wt_scr_discard(WT_SESSION_IMPL *session); +extern void *__wt_ext_scr_alloc( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t size); +extern void __wt_ext_scr_free(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *p); +extern int __wt_stat_dsrc_desc(WT_CURSOR_STAT *cst, int slot, const char **p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_stat_dsrc_init_single(WT_DSRC_STATS *stats); +extern int __wt_stat_dsrc_init( WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_stat_dsrc_discard( WT_SESSION_IMPL *session, WT_DATA_HANDLE *handle); +extern void __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats); +extern void __wt_stat_dsrc_clear_all(WT_DSRC_STATS **stats); +extern void __wt_stat_dsrc_aggregate_single( WT_DSRC_STATS *from, WT_DSRC_STATS *to); +extern void __wt_stat_dsrc_aggregate( WT_DSRC_STATS **from, WT_DSRC_STATS *to); +extern int __wt_stat_connection_desc(WT_CURSOR_STAT *cst, int slot, const char **p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_stat_connection_init_single(WT_CONNECTION_STATS *stats); +extern int __wt_stat_connection_init( WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_stat_connection_discard( WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *handle); +extern void __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats); +extern void __wt_stat_connection_clear_all(WT_CONNECTION_STATS **stats); +extern void __wt_stat_connection_aggregate( WT_CONNECTION_STATS **from, WT_CONNECTION_STATS *to); +extern int __wt_stat_join_desc(WT_CURSOR_STAT *cst, int slot, const char **p) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_stat_join_init_single(WT_JOIN_STATS *stats); +extern void __wt_stat_join_clear_single(WT_JOIN_STATS *stats); +extern void __wt_stat_join_clear_all(WT_JOIN_STATS **stats); +extern void __wt_stat_join_aggregate( WT_JOIN_STATS **from, WT_JOIN_STATS *to); +extern int __wt_thread_group_resize( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, uint32_t new_min, uint32_t new_max, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_group_create( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, const char *name, uint32_t min, uint32_t max, uint32_t flags, bool (*chk_func)(WT_SESSION_IMPL *session), int (*run_func)(WT_SESSION_IMPL *session, WT_THREAD *context), int (*stop_func)(WT_SESSION_IMPL *session, WT_THREAD *context)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_thread_group_start_one( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool is_locked); +extern void __wt_thread_group_stop_one(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group); +extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session); +extern void __wt_txn_get_snapshot(WT_SESSION_IMPL *session); +extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_txn_release(WT_SESSION_IMPL *session); +extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_txn_stats_update(WT_SESSION_IMPL *session); +extern void __wt_txn_destroy(WT_SESSION_IMPL *session); +extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_txn_global_destroy(WT_SESSION_IMPL *session); +extern int __wt_verbose_dump_txn(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern uint64_t __wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session); +extern int __wt_ext_transaction_isolation_level( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_ext_transaction_notify( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_TXN_NOTIFY *notify) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern uint64_t __wt_ext_transaction_oldest(WT_EXTENSION_API *wt_api); +extern int __wt_ext_transaction_visible( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint64_t transaction_id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_txn_op_free(WT_SESSION_IMPL *session, WT_TXN_OP *op); +extern int __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_log_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_checkpoint_logread(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *ckpt_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_checkpoint_log( WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_truncate_log( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_txn_truncate_end(WT_SESSION_IMPL *session); extern int __wt_txn_printlog(WT_SESSION *wt_session, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *cfg[], bool *has_create, bool *has_drops) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_txn_recover(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *cfg[], bool *has_create, bool *has_drops) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session); +extern int __wt_txn_recover(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/include/extern_posix.h b/src/include/extern_posix.h index 57d94e392d1..3afffef687b 100644 --- a/src/include/extern_posix.h +++ b/src/include/extern_posix.h @@ -1,32 +1,32 @@ /* DO NOT EDIT: automatically built by dist/s_prototypes. */ -extern int __wt_posix_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_posix_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_posix_file_extend( WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_os_posix(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_posix_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_posix_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_posix_file_extend( WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_os_posix(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_posix_map_preload(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_posix_map_discard(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_region, size_t len, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern bool __wt_absolute_path(const char *path) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern const char *__wt_path_separator(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_posix_map_preload(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_posix_map_discard(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_region, size_t len, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled); +extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); +extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_absolute_path(const char *path); +extern const char *__wt_path_separator(void); +extern bool __wt_has_priv(void); extern void __wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_yield(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); diff --git a/src/include/extern_win.h b/src/include/extern_win.h index 43127a0c79f..4e232a2df80 100644 --- a/src/include/extern_win.h +++ b/src/include/extern_win.h @@ -1,35 +1,35 @@ /* DO NOT EDIT: automatically built by dist/s_prototypes. */ -extern int __wt_win_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_win_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_win_fs_size(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_os_win(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_region, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern bool __wt_absolute_path(const char *path) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern const char *__wt_path_separator(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_to_utf16_string( WT_SESSION_IMPL *session, const char*utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_to_utf8_string( WT_SESSION_IMPL *session, const wchar_t*wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern DWORD __wt_getlasterror(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_map_windows_error(DWORD windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern const char *__wt_formatmessage(WT_SESSION_IMPL *session, DWORD windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_yield(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_win_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_win_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_win_fs_size(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_os_win(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_region, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled); +extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); +extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern bool __wt_absolute_path(const char *path); +extern const char *__wt_path_separator(void); +extern bool __wt_has_priv(void); +extern void __wt_stream_set_line_buffer(FILE *fp); +extern void __wt_stream_set_no_buffer(FILE *fp); +extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); +extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp); +extern int __wt_to_utf16_string( WT_SESSION_IMPL *session, const char*utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_to_utf8_string( WT_SESSION_IMPL *session, const wchar_t*wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern DWORD __wt_getlasterror(void); +extern int __wt_map_windows_error(DWORD windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern const char *__wt_formatmessage(WT_SESSION_IMPL *session, DWORD windows_error); +extern void __wt_yield(void); -- cgit v1.2.1 From 2ff2328a0a2a615884206633b287850329a73cc1 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 21 Apr 2017 11:29:12 -0400 Subject: WT-3219 Make the clang-analyzer job fail when lint is introduced (#3400) Quiet the four remaining clang-analyzer complaints. --- src/btree/row_key.c | 2 ++ src/os_common/os_alloc.c | 2 ++ src/os_posix/os_dir.c | 6 ++++++ src/session/session_api.c | 15 +++++++++------ 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/btree/row_key.c b/src/btree/row_key.c index a455a6acace..a016568898f 100644 --- a/src/btree/row_key.c +++ b/src/btree/row_key.c @@ -471,6 +471,8 @@ __wt_row_ikey_alloc(WT_SESSION_IMPL *session, { WT_IKEY *ikey; + WT_ASSERT(session, key != NULL); /* quiet clang scan-build */ + /* * Allocate memory for the WT_IKEY structure and the key, then copy * the key into place. diff --git a/src/os_common/os_alloc.c b/src/os_common/os_alloc.c index 2f31316d826..388c9c8c18b 100644 --- a/src/os_common/os_alloc.c +++ b/src/os_common/os_alloc.c @@ -266,6 +266,8 @@ __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp) WT_RET(__wt_malloc(session, len + 1, &p)); + WT_ASSERT(session, p != NULL); /* quiet clang scan-build */ + /* * Don't change this to strncpy, we rely on this function to duplicate * "strings" that contain nul bytes. diff --git a/src/os_posix/os_dir.c b/src/os_posix/os_dir.c index ca11ce918ad..8f77aba5f96 100644 --- a/src/os_posix/os_dir.c +++ b/src/os_posix/os_dir.c @@ -37,7 +37,13 @@ __wt_posix_directory_list(WT_FILE_SYSTEM *file_system, dirallocsz = 0; entries = NULL; + /* + * If opendir fails, we should have a NULL pointer with an error value, + * but various static analysis programs remain unconvinced, check both. + */ WT_SYSCALL_RETRY(((dirp = opendir(directory)) == NULL ? -1 : 0), ret); + if (dirp == NULL && ret == 0) + ret = EINVAL; if (ret != 0) WT_RET_MSG(session, ret, "%s: directory-list: opendir", directory); diff --git a/src/session/session_api.c b/src/session/session_api.c index d7fdc12fc3c..c89e1999ef8 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -1102,7 +1102,6 @@ int __wt_session_range_truncate(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *start, WT_CURSOR *stop) { - WT_CURSOR *cursor; WT_DECL_RET; int cmp; bool local_start; @@ -1131,12 +1130,13 @@ __wt_session_range_truncate(WT_SESSION_IMPL *session, } /* - * Cursor truncate is only supported for some objects, check for the - * supporting methods we need, range_truncate and compare. + * Cursor truncate is only supported for some objects, check for a + * supporting compare method. */ - cursor = start == NULL ? stop : start; - if (cursor->compare == NULL) - WT_ERR(__wt_bad_object_type(session, cursor->uri)); + if (start != NULL && start->compare == NULL) + WT_ERR(__wt_bad_object_type(session, start->uri)); + if (stop != NULL && stop->compare == NULL) + WT_ERR(__wt_bad_object_type(session, stop->uri)); /* * If both cursors set, check they're correctly ordered with respect to @@ -1147,6 +1147,9 @@ __wt_session_range_truncate(WT_SESSION_IMPL *session, * reference the same object and the keys are set. */ if (start != NULL && stop != NULL) { + /* quiet clang scan-build */ + WT_ASSERT(session, start->compare != NULL); + WT_ERR(start->compare(start, stop, &cmp)); if (cmp > 0) WT_ERR_MSG(session, EINVAL, -- cgit v1.2.1 From e7d1e7f3ed1362f38b64ec7a9a5fcbe883b8d771 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 24 Apr 2017 16:40:21 -0400 Subject: WT-3300 Coverity 1374542: Dereference after null check (#3401) False positive, but explicitly checking the stop variable should make the complaint go away. --- src/session/session_api.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/session/session_api.c b/src/session/session_api.c index c89e1999ef8..592d6835809 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -1185,8 +1185,11 @@ __wt_session_range_truncate(WT_SESSION_IMPL *session, * data structures can move through pages faster forward than backward. * If we don't have a start cursor, create one and position it at the * first record. + * + * If start is NULL, stop must not be NULL, but static analyzers have + * a hard time with that, test explicitly. */ - if (start == NULL) { + if (start == NULL && stop != NULL) { WT_ERR(__session_open_cursor( (WT_SESSION *)session, stop->uri, NULL, NULL, &start)); local_start = true; -- cgit v1.2.1 From 3f02e205906c487376a04cd936888398913161c4 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Tue, 25 Apr 2017 12:02:45 -0400 Subject: SERVER-28820 Add a few error path messages in logging. (#3402) --- src/log/log.c | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/log/log.c b/src/log/log.c index 3c37e1eb326..c8ba7366f3f 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -1624,10 +1624,40 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, WT_RET_MSG(session, WT_ERROR, "choose either a start LSN or a start flag"); - /* Offsets must be on allocation boundaries. */ - if (lsnp->l.offset % allocsize != 0 || - lsnp->l.file > log->fileid) - return (WT_NOTFOUND); + /* + * Offsets must be on allocation boundaries. + * An invalid LSN from a user should just return + * WT_NOTFOUND. It is not an error. But if it is + * from recovery, we expect valid LSNs so give more + * information about that. + */ + if (lsnp->l.offset % allocsize != 0) { + if (LF_ISSET(WT_LOGSCAN_RECOVER)) + WT_RET_MSG(session, WT_NOTFOUND, + "__wt_log_scan unaligned LSN %" + PRIu32 "/%" PRIu32, + lsnp->l.file, lsnp->l.offset); + else + return (WT_NOTFOUND); + } + /* + * If the file is in the future it doesn't exist. + * An invalid LSN from a user should just return + * WT_NOTFOUND. It is not an error. But if it is + * from recovery, we expect valid LSNs so give more + * information about that. + */ + if (lsnp->l.file > log->fileid) { + if (LF_ISSET(WT_LOGSCAN_RECOVER)) + WT_RET_MSG(session, WT_NOTFOUND, + "__wt_log_scan LSN %" PRIu32 "/%" + PRIu32 + " larger than biggest log file %" + PRIu32, lsnp->l.file, + lsnp->l.offset, log->fileid); + else + return (WT_NOTFOUND); + } /* * Log cursors may not know the starting LSN. If an -- cgit v1.2.1 From 52c1dbed31b2832c4b183432478a1caa336b9665 Mon Sep 17 00:00:00 2001 From: Sulabh Mahajan Date: Fri, 28 Apr 2017 09:01:55 +1000 Subject: WT-3302 Introduce a thread struct with a member telling if thread created successfully (#3405) * Introduce a thread stuct with a member denoting if thread got created * Change windows thread create in accordance to new thread struct * Fix build error --- src/async/async_api.c | 11 +++-------- src/include/os_windows.h | 5 ++++- src/include/posix.h | 5 ++++- src/os_posix/os_thread.c | 16 ++++++++++++---- src/os_win/os_thread.c | 15 +++++++++++---- src/support/thread_group.c | 2 +- 6 files changed, 35 insertions(+), 19 deletions(-) diff --git a/src/async/async_api.c b/src/async/async_api.c index e4943e61ed4..1e4bfd51c46 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -395,13 +395,12 @@ __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) * Join any worker we're stopping. * After the thread is stopped, close its session. */ - WT_ASSERT(session, async->worker_tids[i] != 0); + WT_ASSERT(session, async->worker_tids[i].created); WT_ASSERT(session, async->worker_sessions[i] != NULL); F_CLR(async->worker_sessions[i], WT_SESSION_SERVER_ASYNC); WT_TRET(__wt_thread_join( session, async->worker_tids[i])); - async->worker_tids[i] = 0; wt_session = &async->worker_sessions[i]->iface; WT_TRET(wt_session->close(wt_session, NULL)); async->worker_sessions[i] = NULL; @@ -435,11 +434,7 @@ __wt_async_destroy(WT_SESSION_IMPL *session) F_CLR(conn, WT_CONN_SERVER_ASYNC); for (i = 0; i < conn->async_workers; i++) - if (async->worker_tids[i] != 0) { - WT_TRET(__wt_thread_join( - session, async->worker_tids[i])); - async->worker_tids[i] = 0; - } + WT_TRET(__wt_thread_join(session, async->worker_tids[i])); WT_TRET(__wt_cond_destroy(session, &async->flush_cond)); /* Close the server threads' sessions. */ @@ -497,7 +492,7 @@ __wt_async_flush(WT_SESSION_IMPL *session) */ workers = 0; for (i = 0; i < conn->async_workers; ++i) - if (async->worker_tids[i] != 0) + if (async->worker_tids[i].created) ++workers; if (workers == 0) return (0); diff --git a/src/include/os_windows.h b/src/include/os_windows.h index 764ade9328c..78a359e65fd 100644 --- a/src/include/os_windows.h +++ b/src/include/os_windows.h @@ -12,7 +12,10 @@ */ typedef CONDITION_VARIABLE wt_cond_t; typedef CRITICAL_SECTION wt_mutex_t; -typedef HANDLE wt_thread_t; +typedef struct { + bool created; + HANDLE id; +} wt_thread_t; /* * Thread callbacks need to match the return signature of _beginthreadex. diff --git a/src/include/posix.h b/src/include/posix.h index aaa88cde4be..23a4d178e98 100644 --- a/src/include/posix.h +++ b/src/include/posix.h @@ -25,7 +25,10 @@ */ typedef pthread_cond_t wt_cond_t; typedef pthread_mutex_t wt_mutex_t; -typedef pthread_t wt_thread_t; +typedef struct { + bool created; + pthread_t id; +} wt_thread_t; /* * Thread callbacks need to match the platform specific callback types diff --git a/src/os_posix/os_thread.c b/src/os_posix/os_thread.c index d03d0203de6..dfcf297c239 100644 --- a/src/os_posix/os_thread.c +++ b/src/os_posix/os_thread.c @@ -26,9 +26,11 @@ __wt_thread_create(WT_SESSION_IMPL *session, WT_FULL_BARRIER(); /* Spawn a new thread of control. */ - WT_SYSCALL_RETRY(pthread_create(tidret, NULL, func, arg), ret); - if (ret == 0) + WT_SYSCALL_RETRY(pthread_create(&tidret->id, NULL, func, arg), ret); + if (ret == 0) { + tidret->created = true; return (0); + } WT_RET_MSG(session, ret, "pthread_create"); } @@ -41,6 +43,10 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) { WT_DECL_RET; + /* Only attempt to join if thread was created successfully */ + if (!tid.created) + return (0); + /* * Joining a thread isn't a memory barrier, but WiredTiger commonly * sets flags and or state and then expects worker threads to halt. @@ -48,9 +54,11 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) */ WT_FULL_BARRIER(); - WT_SYSCALL(pthread_join(tid, NULL), ret); - if (ret == 0) + WT_SYSCALL(pthread_join(tid.id, NULL), ret); + if (ret == 0) { + tid.created = false; return (0); + } WT_RET_MSG(session, ret, "pthread_join"); } diff --git a/src/os_win/os_thread.c b/src/os_win/os_thread.c index e1b30b770cd..1ecf53e382e 100644 --- a/src/os_win/os_thread.c +++ b/src/os_win/os_thread.c @@ -24,9 +24,11 @@ __wt_thread_create(WT_SESSION_IMPL *session, WT_FULL_BARRIER(); /* Spawn a new thread of control. */ - *tidret = (HANDLE)_beginthreadex(NULL, 0, func, arg, 0, NULL); - if (*tidret != 0) + tidret->id = (HANDLE)_beginthreadex(NULL, 0, func, arg, 0, NULL); + if (tidret->id != 0) { + tidret->created = true; return (0); + } WT_RET_MSG(session, __wt_errno(), "thread create: _beginthreadex"); } @@ -40,6 +42,10 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) { DWORD windows_error; + /* Only attempt to join if thread was created successfully */ + if (!tid.created) + return (0); + /* * Joining a thread isn't a memory barrier, but WiredTiger commonly * sets flags and or state and then expects worker threads to halt. @@ -48,7 +54,7 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_FULL_BARRIER(); if ((windows_error = - WaitForSingleObject(tid, INFINITE)) != WAIT_OBJECT_0) { + WaitForSingleObject(tid.id, INFINITE)) != WAIT_OBJECT_0) { if (windows_error == WAIT_FAILED) windows_error = __wt_getlasterror(); __wt_errx(session, "thread join: WaitForSingleObject: %s", @@ -58,13 +64,14 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) return (WT_PANIC); } - if (CloseHandle(tid) == 0) { + if (CloseHandle(tid.id) == 0) { windows_error = __wt_getlasterror(); __wt_errx(session, "thread join: CloseHandle: %s", __wt_formatmessage(session, windows_error)); return (__wt_map_windows_error(windows_error)); } + tid.created = false; return (0); } diff --git a/src/support/thread_group.c b/src/support/thread_group.c index 84c836e5627..5abc3d28cc0 100644 --- a/src/support/thread_group.c +++ b/src/support/thread_group.c @@ -79,7 +79,7 @@ __thread_group_shrink( if (thread == NULL) continue; - WT_ASSERT(session, thread->tid != 0); + WT_ASSERT(session, thread->tid.created); __wt_verbose(session, WT_VERB_THREAD_GROUP, "Stopping utility thread: %p:%" PRIu32, (void *)group, thread->id); -- cgit v1.2.1 From 148cef6f4cd887cf39cd6cd811a677236e7e312b Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 28 Apr 2017 00:15:24 -0400 Subject: WT-3297 support the gcc/clang -fvisibility=hidden flag (#3404) --- dist/s_string.ok | 1 + src/docs/programming.dox | 15 +++++----- src/docs/spell.ok | 1 + src/docs/tune-build-options.dox | 9 ++++++ src/include/gcc.h | 2 +- src/include/lint.h | 1 + src/include/msvc.h | 4 +-- src/include/wiredtiger.in | 64 +++++++++++++++++++++++++++++------------ src/os_common/os_getopt.c | 8 ++++-- 9 files changed, 73 insertions(+), 32 deletions(-) create mode 100644 src/docs/tune-build-options.dox diff --git a/dist/s_string.ok b/dist/s_string.ok index 7f8234d007a..ce4e9f963b0 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -733,6 +733,7 @@ fsyncLock fsyncs ftruncate func +fvisibility gcc gdb ge diff --git a/src/docs/programming.dox b/src/docs/programming.dox index aa76bef4614..205e7544c6c 100644 --- a/src/docs/programming.dox +++ b/src/docs/programming.dox @@ -65,19 +65,20 @@ each of which is ordered by one or more columns. - @subpage_single wtperf - @subpage_single wtstats

-- @subpage_single tune_memory_allocator -- @subpage_single tune_page_size_and_comp -- @subpage_single tune_cache +- @subpage_single tune_build_options - @subpage_single tune_bulk_load +- @subpage_single tune_cache +- @subpage_single tune_checksum +- @subpage_single tune_close - @subpage_single tune_cursor_persist -- @subpage_single tune_read_only - @subpage_single tune_durability -- @subpage_single tune_checksum - @subpage_single tune_file_alloc +- @subpage_single tune_memory_allocator +- @subpage_single tune_mutex +- @subpage_single tune_page_size_and_comp +- @subpage_single tune_read_only - @subpage_single tune_system_buffer_cache - @subpage_single tune_transparent_huge_pages -- @subpage_single tune_close -- @subpage_single tune_mutex - @subpage_single tune_zone_reclaim */ diff --git a/src/docs/spell.ok b/src/docs/spell.ok index bc2e16b1122..5d629f4c49f 100644 --- a/src/docs/spell.ok +++ b/src/docs/spell.ok @@ -237,6 +237,7 @@ fput freelist fsync ftruncate +fvisibility gcc gdbm ge diff --git a/src/docs/tune-build-options.dox b/src/docs/tune-build-options.dox new file mode 100644 index 00000000000..79cd60b1105 --- /dev/null +++ b/src/docs/tune-build-options.dox @@ -0,0 +1,9 @@ +/*! @page tune_build_options gcc/clang build options + +WiredTiger can be built using the gcc/clang \c -fvisibility=hidden flag, +which may significantly reduce the size and load time of the WiredTiger +library when built as a dynamic shared object, and allow the optimizer +to produce better code (for example, by eliminating most lookups in the +procedure linkage table). + + */ diff --git a/src/include/gcc.h b/src/include/gcc.h index 684d093bbbc..21eaaaef049 100644 --- a/src/include/gcc.h +++ b/src/include/gcc.h @@ -9,7 +9,7 @@ #define WT_PTRDIFFT_FMT "td" /* ptrdiff_t format string */ #define WT_SIZET_FMT "zu" /* size_t format string */ -/* Add GCC-specific attributes to types and function declarations. */ +/* GCC-specific attributes. */ #define WT_PACKED_STRUCT_BEGIN(name) \ struct __attribute__ ((__packed__)) name { #define WT_PACKED_STRUCT_END \ diff --git a/src/include/lint.h b/src/include/lint.h index 82474b68d11..97b91c4c061 100644 --- a/src/include/lint.h +++ b/src/include/lint.h @@ -9,6 +9,7 @@ #define WT_PTRDIFFT_FMT "td" /* ptrdiff_t format string */ #define WT_SIZET_FMT "zu" /* size_t format string */ +/* Lint-specific attributes. */ #define WT_PACKED_STRUCT_BEGIN(name) \ struct name { #define WT_PACKED_STRUCT_END \ diff --git a/src/include/msvc.h b/src/include/msvc.h index 74a81296dfb..f1fab2add9e 100644 --- a/src/include/msvc.h +++ b/src/include/msvc.h @@ -16,9 +16,7 @@ #define WT_PTRDIFFT_FMT "Id" /* ptrdiff_t format string */ #define WT_SIZET_FMT "Iu" /* size_t format string */ -/* - * Add MSVC-specific attributes and pragmas to types and function declarations. - */ +/* MSVC-specific attributes. */ #define WT_PACKED_STRUCT_BEGIN(name) \ __pragma(pack(push,1)) \ struct name { diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 20db139ff8e..e38c41baccd 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -39,6 +39,16 @@ extern "C" { #define __F(func) (*(func)) #endif +/* + * We support configuring WiredTiger with the gcc/clang -fvisibility=hidden + * flags, but that requires public APIs be specifically marked. + */ +#if defined(DOXYGEN) || defined(SWIG) || !defined(__GNUC__) +#define WT_ATTRIBUTE_LIBRARY_VISIBLE +#else +#define WT_ATTRIBUTE_LIBRARY_VISIBLE __attribute__((visibility("default"))) +#endif + #ifdef SWIG %{ #include @@ -2570,7 +2580,7 @@ struct __wt_connection { */ int wiredtiger_open(const char *home, WT_EVENT_HANDLER *errhandler, const char *config, - WT_CONNECTION **connectionp); + WT_CONNECTION **connectionp) WT_ATTRIBUTE_LIBRARY_VISIBLE; /*! * Return information about a WiredTiger error as a string (see @@ -2581,7 +2591,7 @@ int wiredtiger_open(const char *home, * @param error a return value from a WiredTiger, ISO C, or POSIX standard API * @returns a string representation of the error */ -const char *wiredtiger_strerror(int error); +const char *wiredtiger_strerror(int error) WT_ATTRIBUTE_LIBRARY_VISIBLE; #if !defined(SWIG) /*! @@ -2718,7 +2728,8 @@ struct __wt_event_handler { * @errors */ int wiredtiger_struct_pack(WT_SESSION *session, - void *buffer, size_t size, const char *format, ...); + void *buffer, size_t size, const char *format, ...) + WT_ATTRIBUTE_LIBRARY_VISIBLE; /*! * Calculate the size required to pack a structure. @@ -2736,7 +2747,7 @@ int wiredtiger_struct_pack(WT_SESSION *session, * @errors */ int wiredtiger_struct_size(WT_SESSION *session, - size_t *sizep, const char *format, ...); + size_t *sizep, const char *format, ...) WT_ATTRIBUTE_LIBRARY_VISIBLE; /*! * Unpack a structure from a buffer. @@ -2753,7 +2764,8 @@ int wiredtiger_struct_size(WT_SESSION *session, * @errors */ int wiredtiger_struct_unpack(WT_SESSION *session, - const void *buffer, size_t size, const char *format, ...); + const void *buffer, size_t size, const char *format, ...) + WT_ATTRIBUTE_LIBRARY_VISIBLE; #if !defined(SWIG) @@ -2780,7 +2792,8 @@ typedef struct __wt_pack_stream WT_PACK_STREAM; * @errors */ int wiredtiger_pack_start(WT_SESSION *session, - const char *format, void *buffer, size_t size, WT_PACK_STREAM **psp); + const char *format, void *buffer, size_t size, WT_PACK_STREAM **psp) + WT_ATTRIBUTE_LIBRARY_VISIBLE; /*! * Start an unpacking operation from a buffer with the given format string. @@ -2796,7 +2809,8 @@ int wiredtiger_pack_start(WT_SESSION *session, * @errors */ int wiredtiger_unpack_start(WT_SESSION *session, - const char *format, const void *buffer, size_t size, WT_PACK_STREAM **psp); + const char *format, const void *buffer, size_t size, WT_PACK_STREAM **psp) + WT_ATTRIBUTE_LIBRARY_VISIBLE; /*! * Close a packing stream. @@ -2805,7 +2819,8 @@ int wiredtiger_unpack_start(WT_SESSION *session, * @param[out] usedp the number of bytes in the buffer used by the stream * @errors */ -int wiredtiger_pack_close(WT_PACK_STREAM *ps, size_t *usedp); +int wiredtiger_pack_close(WT_PACK_STREAM *ps, size_t *usedp) + WT_ATTRIBUTE_LIBRARY_VISIBLE; /*! * Pack an item into a packing stream. @@ -2814,7 +2829,8 @@ int wiredtiger_pack_close(WT_PACK_STREAM *ps, size_t *usedp); * @param item an item to pack * @errors */ -int wiredtiger_pack_item(WT_PACK_STREAM *ps, WT_ITEM *item); +int wiredtiger_pack_item(WT_PACK_STREAM *ps, WT_ITEM *item) + WT_ATTRIBUTE_LIBRARY_VISIBLE; /*! * Pack a signed integer into a packing stream. @@ -2823,7 +2839,8 @@ int wiredtiger_pack_item(WT_PACK_STREAM *ps, WT_ITEM *item); * @param i a signed integer to pack * @errors */ -int wiredtiger_pack_int(WT_PACK_STREAM *ps, int64_t i); +int wiredtiger_pack_int(WT_PACK_STREAM *ps, int64_t i) + WT_ATTRIBUTE_LIBRARY_VISIBLE; /*! * Pack a string into a packing stream. @@ -2832,7 +2849,8 @@ int wiredtiger_pack_int(WT_PACK_STREAM *ps, int64_t i); * @param s a string to pack * @errors */ -int wiredtiger_pack_str(WT_PACK_STREAM *ps, const char *s); +int wiredtiger_pack_str(WT_PACK_STREAM *ps, const char *s) + WT_ATTRIBUTE_LIBRARY_VISIBLE; /*! * Pack an unsigned integer into a packing stream. @@ -2841,7 +2859,8 @@ int wiredtiger_pack_str(WT_PACK_STREAM *ps, const char *s); * @param u an unsigned integer to pack * @errors */ -int wiredtiger_pack_uint(WT_PACK_STREAM *ps, uint64_t u); +int wiredtiger_pack_uint(WT_PACK_STREAM *ps, uint64_t u) + WT_ATTRIBUTE_LIBRARY_VISIBLE; /*! * Unpack an item from a packing stream. @@ -2850,7 +2869,8 @@ int wiredtiger_pack_uint(WT_PACK_STREAM *ps, uint64_t u); * @param item an item to unpack * @errors */ -int wiredtiger_unpack_item(WT_PACK_STREAM *ps, WT_ITEM *item); +int wiredtiger_unpack_item(WT_PACK_STREAM *ps, WT_ITEM *item) + WT_ATTRIBUTE_LIBRARY_VISIBLE; /*! * Unpack a signed integer from a packing stream. @@ -2859,7 +2879,8 @@ int wiredtiger_unpack_item(WT_PACK_STREAM *ps, WT_ITEM *item); * @param[out] ip the unpacked signed integer * @errors */ -int wiredtiger_unpack_int(WT_PACK_STREAM *ps, int64_t *ip); +int wiredtiger_unpack_int(WT_PACK_STREAM *ps, int64_t *ip) + WT_ATTRIBUTE_LIBRARY_VISIBLE; /*! * Unpack a string from a packing stream. @@ -2868,7 +2889,8 @@ int wiredtiger_unpack_int(WT_PACK_STREAM *ps, int64_t *ip); * @param[out] sp the unpacked string * @errors */ -int wiredtiger_unpack_str(WT_PACK_STREAM *ps, const char **sp); +int wiredtiger_unpack_str(WT_PACK_STREAM *ps, const char **sp) + WT_ATTRIBUTE_LIBRARY_VISIBLE; /*! * Unpack an unsigned integer from a packing stream. @@ -2877,7 +2899,8 @@ int wiredtiger_unpack_str(WT_PACK_STREAM *ps, const char **sp); * @param[out] up the unpacked unsigned integer * @errors */ -int wiredtiger_unpack_uint(WT_PACK_STREAM *ps, uint64_t *up); +int wiredtiger_unpack_uint(WT_PACK_STREAM *ps, uint64_t *up) + WT_ATTRIBUTE_LIBRARY_VISIBLE; /*! @} */ /*! @@ -2955,7 +2978,8 @@ struct __wt_config_item { * @snippet ex_all.c Validate a configuration string */ int wiredtiger_config_validate(WT_SESSION *session, - WT_EVENT_HANDLER *errhandler, const char *name, const char *config); + WT_EVENT_HANDLER *errhandler, const char *name, const char *config) + WT_ATTRIBUTE_LIBRARY_VISIBLE; #endif /*! @@ -2975,7 +2999,8 @@ int wiredtiger_config_validate(WT_SESSION *session, * @snippet ex_config_parse.c Create a configuration parser */ int wiredtiger_config_parser_open(WT_SESSION *session, - const char *config, size_t len, WT_CONFIG_PARSER **config_parserp); + const char *config, size_t len, WT_CONFIG_PARSER **config_parserp) + WT_ATTRIBUTE_LIBRARY_VISIBLE; /*! * A handle that can be used to search and traverse configuration strings @@ -3064,7 +3089,8 @@ struct __wt_config_parser { * @param patchp a location where the patch version number is returned * @returns a string representation of the version */ -const char *wiredtiger_version(int *majorp, int *minorp, int *patchp); +const char *wiredtiger_version(int *majorp, int *minorp, int *patchp) + WT_ATTRIBUTE_LIBRARY_VISIBLE; /******************************************* * Error returns diff --git a/src/os_common/os_getopt.c b/src/os_common/os_getopt.c index 26b3b11a1f7..ca516ca62e5 100644 --- a/src/os_common/os_getopt.c +++ b/src/os_common/os_getopt.c @@ -59,13 +59,17 @@ #include "wt_internal.h" -extern int __wt_opterr, __wt_optind, __wt_optopt, __wt_optreset; +extern int __wt_opterr WT_ATTRIBUTE_LIBRARY_VISIBLE; +extern int __wt_optind WT_ATTRIBUTE_LIBRARY_VISIBLE; +extern int __wt_optopt WT_ATTRIBUTE_LIBRARY_VISIBLE; +extern int __wt_optreset WT_ATTRIBUTE_LIBRARY_VISIBLE; + int __wt_opterr = 1, /* if error message should be printed */ __wt_optind = 1, /* index into parent argv vector */ __wt_optopt, /* character checked for validity */ __wt_optreset; /* reset getopt */ -extern char *__wt_optarg; +extern char *__wt_optarg WT_ATTRIBUTE_LIBRARY_VISIBLE; char *__wt_optarg; /* argument associated with option */ #define BADCH (int)'?' -- cgit v1.2.1 From b9bfe41b26db9acb53d37f5919b7d2bbe29db275 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 28 Apr 2017 00:23:22 -0400 Subject: WT-3296 LAS table fixes/improvements (#3397) * Changes for SERVER-28166: Assess effects of pinning a lot of content in storage engine cache Instead of counting the update chains we skip/don't-skip, calculate the memory held in the update chain. Then if there are skipped updates in the chain, count that memory as memory we won't get back if we rewrite the page, else count it as memory we will recover. Change the test for rewriting a page in memory from a percentage of the update chains that we skipped/didn't-skip to whether or not we'll recover at least 2KB from rewriting the page. This change should avoid rewriting pages in memory where we're just wasting time, that is, tiny pages or pages without any updates on them. Reconciliation has a test in __rec_write_check_complete() of whether an update-restore page reconciliation should fail because there aren't enough updates being discarded that it's worth rewriting the page in memory. That test incorrectly used the count of currently allocated boundary structures in its calculations, not the count of boundary structures used in a specific reconciliation. That bug led to eviction repeatedly doing update-restore reconciliation and rewriting pages in memory, to little or no gain, instead of switching to the LAS table. Further, the test was intended to be if 10% of the update chains won't have to be re-instantiated when the page is rewritten. I think the test was wrong, but regardless, implementing the test correctly doesn't help with the test load I'm using. (My test is creating a snapshot and then inserting records until the cache is full.) Re-instantiating the page in memory stalls in that test case, which makes sense because rewriting a page in memory isn't evicting anything, it's just shrinking the memory being pinned down. Change the test: if we can discard a chunk of the original page, rewrite the page in memory. Else, fallback to the lookaside table if we think lookaside will work. * Try to shorten update chains during eviction. * The memory in the update chain should include memory held by aborted transactions. Rather than put another test inside the loop, split the loop into two parts, eviction and checkpoint. Checkpoint doesn't need to calculate the memory in the update chain, nor does it have to track the minimum transaction ID. * Add a smoke test for the LAS table. * Skip reserved items when inserting update records into the LAS table, they're never restored. * Don't fall back to the LAS table when configured for in-memory eviction. * Replace __wt_evict's test of WT_EVICT_IN_MEMORY in with a test of WT_CONN_IN_MEMORY, there's no reason to further complicate the pass-back of the flags value to that function. * Ignore lookaside table collision tests and checks to avoid rewriting pages in memory for little gain, when configured in-memory. The former isn't an issue for in-memory configurations, and small caches with in-memory configurations can force us to rewrite every possible page. --- src/btree/bt_handle.c | 6 +- src/evict/evict_page.c | 18 +++--- src/include/btree.h | 10 ++-- src/reconcile/rec_write.c | 138 ++++++++++++++++++++++++++++++++------------- test/suite/test_inmem01.py | 3 + test/suite/test_las.py | 60 ++++++++++++++++++++ 6 files changed, 183 insertions(+), 52 deletions(-) create mode 100644 test/suite/test_las.py diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 40e7a601d47..687a77aaa65 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -447,9 +447,11 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) __wt_rwlock_init(session, &btree->ovfl_lock); WT_RET(__wt_spin_init(session, &btree->flush_lock, "btree flush")); - btree->checkpointing = WT_CKPT_OFF; /* Not checkpointing */ btree->modified = false; /* Clean */ - btree->write_gen = ckpt->write_gen; /* Write generation */ + + btree->checkpointing = WT_CKPT_OFF; /* Not checkpointing */ + btree->write_gen = ckpt->write_gen; /* Write generation */ + btree->checkpoint_gen = __wt_gen(session, WT_GEN_CHECKPOINT); return (0); } diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index edcd108e7e4..80aba818153 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -159,7 +159,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) /* Update the reference and discard the page. */ if (__wt_ref_is_root(ref)) __wt_ref_out(session, ref); - else if ((clean_page && !LF_ISSET(WT_EVICT_IN_MEMORY)) || tree_dead) + else if ((clean_page && !F_ISSET(conn, WT_CONN_IN_MEMORY)) || tree_dead) /* * Pages that belong to dead trees never write back to disk * and can't support page splits. @@ -208,8 +208,8 @@ __evict_delete_ref(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) return (0); /* - * Avoid doing reverse splits when closing the file, it is - * wasted work and some structure may already have been freed. + * Avoid doing reverse splits when closing the file, it is wasted work + * and some structures may have already been freed. */ if (!closing) { parent = ref->home; @@ -399,11 +399,13 @@ __evict_review( WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *flagsp, bool closing) { WT_CACHE *cache; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_PAGE *page; uint32_t flags; bool lookaside_retry, modified; + conn = S2C(session); flags = WT_EVICTING; *flagsp = flags; @@ -459,7 +461,7 @@ __evict_review( * Clean pages can't be evicted when running in memory only. This * should be uncommon - we don't add clean pages to the queue. */ - if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY) && !modified && !closing) + if (F_ISSET(conn, WT_CONN_IN_MEMORY) && !modified && !closing) return (EBUSY); /* Check if the page can be evicted. */ @@ -521,11 +523,11 @@ __evict_review( * Additionally, if we aren't trying to free space in the cache, scrub * the page and keep it in memory. */ - cache = S2C(session)->cache; + cache = conn->cache; if (closing) LF_SET(WT_VISIBILITY_ERR); else if (!WT_PAGE_IS_INTERNAL(page)) { - if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) + if (F_ISSET(conn, WT_CONN_IN_MEMORY)) LF_SET(WT_EVICT_IN_MEMORY | WT_EVICT_SCRUB | WT_EVICT_UPDATE_RESTORE); else { @@ -547,7 +549,9 @@ __evict_review( * lookaside table, allowing the eviction of pages we'd otherwise have * to retain in cache to support older readers. */ - if (ret == EBUSY && __wt_cache_stuck(session) && lookaside_retry) { + if (ret == EBUSY && + !F_ISSET(conn, WT_CONN_IN_MEMORY) && + __wt_cache_stuck(session) && lookaside_retry) { LF_CLR(WT_EVICT_SCRUB | WT_EVICT_UPDATE_RESTORE); LF_SET(WT_EVICT_LOOKASIDE); ret = __wt_reconcile(session, ref, NULL, flags, NULL); diff --git a/src/include/btree.h b/src/include/btree.h index 8ce77b5ecd3..74c7871034e 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -134,9 +134,12 @@ struct __wt_btree { WT_BM *bm; /* Block manager reference */ u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */ - uint64_t checkpoint_gen; /* Checkpoint generation */ - uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ uint64_t write_gen; /* Write generation */ + uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ + uint64_t checkpoint_gen; /* Checkpoint generation */ + volatile enum { + WT_CKPT_OFF, WT_CKPT_PREPARE, WT_CKPT_RUNNING + } checkpointing; /* Checkpoint in progress */ uint64_t bytes_inmem; /* Cache bytes in memory. */ uint64_t bytes_dirty_intl; /* Bytes in dirty internal pages. */ @@ -151,9 +154,6 @@ struct __wt_btree { volatile uint32_t evict_busy; /* Count of threads in eviction */ int evict_start_type; /* Start position for eviction walk (see WT_EVICT_WALK_START). */ - volatile enum { - WT_CKPT_OFF, WT_CKPT_PREPARE, WT_CKPT_RUNNING - } checkpointing; /* Checkpoint in progress */ /* * We flush pages from the tree (in order to make checkpoint faster), diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 469968c6c4b..52a279b8c96 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -48,9 +48,8 @@ typedef struct { /* Track the page's maximum transaction ID. */ uint64_t max_txn; - /* Track if all updates were skipped. */ - uint64_t update_cnt; - uint64_t update_skip_cnt; + uint64_t update_mem; /* Total update memory */ + uint64_t update_mem_skipped; /* Skipped update memory */ /* * When we can't mark the page clean (for example, checkpoint found some @@ -453,7 +452,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, * that's worth trying. The lookaside table doesn't help if we skipped * updates, it can only help with older readers preventing eviction. */ - if (lookaside_retryp != NULL && r->update_cnt == r->update_skip_cnt) + if (lookaside_retryp != NULL && r->update_mem_skipped == 0) *lookaside_retryp = true; /* Update statistics. */ @@ -557,7 +556,7 @@ __rec_las_checkpoint_test(WT_SESSION_IMPL *session, WT_RECONCILE *r) /* * __rec_write_check_complete -- - * Check that reconciliation should complete + * Check that reconciliation should complete. */ static int __rec_write_check_complete(WT_SESSION_IMPL *session, WT_RECONCILE *r) @@ -565,6 +564,16 @@ __rec_write_check_complete(WT_SESSION_IMPL *session, WT_RECONCILE *r) WT_BOUNDARY *bnd; size_t i; + /* + * Tests in this function are lookaside tests and tests to decide if + * rewriting a page in memory is worth doing. In-memory configurations + * can't use a lookaside table, and we ignore page rewrite desirability + * checks for in-memory eviction because a small cache can force us to + * rewrite every possible page. + */ + if (F_ISSET(r, WT_EVICT_IN_MEMORY)) + return (0); + /* * If we have used the lookaside table, check for a lookaside table and * checkpoint collision. @@ -573,16 +582,29 @@ __rec_write_check_complete(WT_SESSION_IMPL *session, WT_RECONCILE *r) return (EBUSY); /* - * If we are doing update/restore based eviction, confirm part of the - * page is being discarded, or at least 10% of the updates won't have - * to be re-instantiated. Otherwise, it isn't progress, don't bother. + * If doing update/restore based eviction, see if rewriting the page in + * memory is worth the effort. */ if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE)) { - for (bnd = r->bnd, i = 0; i < r->bnd_entries; ++bnd, ++i) + /* If discarding a disk-page size chunk, do it. */ + for (bnd = r->bnd, i = 0; i < r->bnd_next; ++bnd, ++i) if (bnd->supd == NULL) - break; - if (i == r->bnd_entries && - r->update_cnt / 10 >= r->update_skip_cnt) + return (0); + + /* + * Switch to the lookaside table if we can: it's more effective + * than rewriting a page in memory because it implies eviction. + */ + if (r->update_mem_skipped == 0) + return (EBUSY); + + /* + * Don't rewrite pages where we're not going to get back enough + * memory to care. There's no empirical evidence the 2KB limit + * is a good configuration, but it should keep us from wasting + * time on tiny pages and pages with only a few updates. + */ + if (r->update_mem - r->update_mem_skipped < 2 * WT_KILOBYTE) return (EBUSY); } return (0); @@ -888,7 +910,7 @@ __rec_write_init(WT_SESSION_IMPL *session, r->max_txn = WT_TXN_NONE; /* Track if all updates were skipped. */ - r->update_cnt = r->update_skip_cnt = 0; + r->update_mem = r->update_mem_skipped = 0; /* Track if the page can be marked clean. */ r->leave_dirty = false; @@ -1112,7 +1134,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_DECL_ITEM(tmp); WT_PAGE *page; WT_UPDATE *append, *upd, *upd_list; - size_t notused; + size_t notused, update_mem; uint64_t max_txn, min_txn, txnid; bool append_origv, skipped; @@ -1133,36 +1155,62 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, } else upd_list = ins->upd; - ++r->update_cnt; - for (skipped = false, - max_txn = WT_TXN_NONE, min_txn = UINT64_MAX, - upd = upd_list; upd != NULL; upd = upd->next) { - if ((txnid = upd->txnid) == WT_TXN_ABORTED) - continue; + skipped = false; + update_mem = 0; + max_txn = WT_TXN_NONE; + min_txn = UINT64_MAX; - /* Track the largest/smallest transaction IDs on the list. */ - if (WT_TXNID_LT(max_txn, txnid)) - max_txn = txnid; - if (WT_TXNID_LT(txnid, min_txn)) - min_txn = txnid; + if (F_ISSET(r, WT_EVICTING)) { + /* Discard obsolete updates. */ + if ((upd = __wt_update_obsolete_check( + session, page, upd_list->next)) != NULL) + __wt_update_obsolete_free(session, page, upd); + + for (upd = upd_list; upd != NULL; upd = upd->next) { + /* Track the total memory in the update chain. */ + update_mem += WT_UPDATE_MEMSIZE(upd); + + if ((txnid = upd->txnid) == WT_TXN_ABORTED) + continue; + + /* + * Track the largest/smallest transaction IDs on the + * list. + */ + if (WT_TXNID_LT(max_txn, txnid)) + max_txn = txnid; + if (WT_TXNID_LT(txnid, min_txn)) + min_txn = txnid; - /* - * Find the first update we can use. - */ - if (F_ISSET(r, WT_EVICTING)) { /* + * Find the first update we can use. + * * Eviction can write any committed update. * * When reconciling for eviction, track whether any * uncommitted updates are found. + * + * When reconciling for eviction, track the memory held + * by the update chain. */ if (__wt_txn_committed(session, txnid)) { if (*updp == NULL) *updp = upd; } else skipped = true; - } else { + } + } else + for (upd = upd_list; upd != NULL; upd = upd->next) { + if ((txnid = upd->txnid) == WT_TXN_ABORTED) + continue; + + /* Track the largest transaction ID on the list. */ + if (WT_TXNID_LT(max_txn, txnid)) + max_txn = txnid; + /* + * Find the first update we can use. + * * Checkpoint can only write updates visible as of its * snapshot. * @@ -1177,7 +1225,6 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, skipped = true; } } - } /* Reconciliation should never see a reserved update. */ WT_ASSERT(session, *updp == NULL || !WT_UPDATE_RESERVED_ISSET(*updp)); @@ -1227,12 +1274,6 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, txnid != S2C(session)->txn_global.checkpoint_txnid || WT_SESSION_IS_CHECKPOINT(session)); #endif - - /* - * Track how many update chains we saw vs. how many update - * chains had an entry we skipped. - */ - ++r->update_skip_cnt; return (0); } @@ -1276,6 +1317,23 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, if (skipped && !F_ISSET(r, WT_EVICT_UPDATE_RESTORE)) return (EBUSY); + /* + * Track the memory required by the update chain. + * + * A page with no uncommitted (skipped) updates, that can't be evicted + * because some updates aren't yet globally visible, can be evicted by + * writing previous versions of the updates to the lookaside file. That + * test is just checking if the skipped updates memory is zero. + * + * If that's not possible (there are skipped updates), we can rewrite + * the pages in-memory, but we don't want to unless there's memory to + * recover. That test is comparing the memory we'd recover to the memory + * we'd have to re-instantiate as part of the rewrite. + */ + r->update_mem += update_mem; + if (skipped) + r->update_mem_skipped += update_mem; + append_origv = false; if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE)) { /* @@ -3613,9 +3671,13 @@ __rec_update_las(WT_SESSION_IMPL *session, /* * Walk the list of updates, storing each key/value pair into - * the lookaside table. + * the lookaside table. Skipped reserved items, they're never + * restored, obviously. */ do { + if (WT_UPDATE_RESERVED_ISSET(upd)) + continue; + cursor->set_key(cursor, btree_id, &las_addr, ++las_counter, list->onpage_txn, key); diff --git a/test/suite/test_inmem01.py b/test/suite/test_inmem01.py index 79a44d434d0..694bcabbe77 100644 --- a/test/suite/test_inmem01.py +++ b/test/suite/test_inmem01.py @@ -108,12 +108,15 @@ class test_inmem01(wttest.WiredTigerTestCase): cursor.reset() # Spin inserting to give eviction a chance to reclaim space + sleeps = 0 inserted = False for i in range(1, 1000): try: cursor[ds.key(1)] = ds.value(1) except wiredtiger.WiredTigerError: cursor.reset() + sleeps = sleeps + 1 + self.assertLess(sleeps, 60 * 5) sleep(1) continue inserted = True diff --git a/test/suite/test_las.py b/test/suite/test_las.py new file mode 100644 index 00000000000..d0bd1d108fa --- /dev/null +++ b/test/suite/test_las.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from wtdataset import SimpleDataSet + +# test_las.py +# Smoke tests to ensure lookaside tables are working. +class test_las(wttest.WiredTigerTestCase): + # Force a small cache. + def conn_config(self): + return 'cache_size=1GB' + + @wttest.longtest('lookaside table smoke test') + def test_las(self): + # Create a small table. + uri = "table:test_las" + nrows = 100 + ds = SimpleDataSet(self, uri, nrows, key_format="S") + ds.populate() + + # Take a snapshot. + self.session.snapshot("name=xxx") + + # Insert a large number of records, we'll hang if the lookaside table + # isn't doing its thing. + c = self.session.open_cursor(uri) + bigvalue = "abcde" * 100 + for i in range(1, 1000000): + c.set_key(ds.key(nrows + i)) + c.set_value(bigvalue) + self.assertEquals(c.insert(), 0) + +if __name__ == '__main__': + wttest.run() -- cgit v1.2.1 From 48c7cf59ccf583369fa98733b388983bd4abb70e Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Sat, 29 Apr 2017 01:24:50 +1000 Subject: WT-3142 Add a workload generator application. (#3255) The workload generator runs core workloads in C++, which directly calls the C API and thus minimizes the overhead of the test program. The workload setup is done in Python to allow for an expressive workload definition language. Initial commit has a number of example workloads that match equivalent wtperf workloads. --- bench/workgen/Makefile.am | 32 + bench/workgen/runner/example_simple.py | 31 + bench/workgen/runner/example_txn.py | 29 + bench/workgen/runner/insert_test.py | 94 ++ bench/workgen/runner/multi_btree_heavy_stress.py | 102 ++ bench/workgen/runner/runner/__init__.py | 92 ++ bench/workgen/runner/runner/core.py | 101 ++ bench/workgen/runner/runner/latency.py | 122 ++ bench/workgen/runner/small_btree.py | 27 + bench/workgen/setup.py | 70 + bench/workgen/workgen.cxx | 1605 ++++++++++++++++++++++ bench/workgen/workgen.h | 410 ++++++ bench/workgen/workgen.swig | 233 ++++ bench/workgen/workgen/__init__.py | 42 + bench/workgen/workgen_func.c | 86 ++ bench/workgen/workgen_func.h | 44 + bench/workgen/workgen_int.h | 205 +++ bench/workgen/workgen_time.h | 201 +++ build_posix/Make.subdirs | 1 + dist/s_string.ok | 4 + dist/s_whitespace | 1 + 21 files changed, 3532 insertions(+) create mode 100644 bench/workgen/Makefile.am create mode 100755 bench/workgen/runner/example_simple.py create mode 100644 bench/workgen/runner/example_txn.py create mode 100644 bench/workgen/runner/insert_test.py create mode 100644 bench/workgen/runner/multi_btree_heavy_stress.py create mode 100644 bench/workgen/runner/runner/__init__.py create mode 100644 bench/workgen/runner/runner/core.py create mode 100644 bench/workgen/runner/runner/latency.py create mode 100644 bench/workgen/runner/small_btree.py create mode 100644 bench/workgen/setup.py create mode 100644 bench/workgen/workgen.cxx create mode 100644 bench/workgen/workgen.h create mode 100644 bench/workgen/workgen.swig create mode 100644 bench/workgen/workgen/__init__.py create mode 100644 bench/workgen/workgen_func.c create mode 100644 bench/workgen/workgen_func.h create mode 100644 bench/workgen/workgen_int.h create mode 100644 bench/workgen/workgen_time.h diff --git a/bench/workgen/Makefile.am b/bench/workgen/Makefile.am new file mode 100644 index 00000000000..cfe8c940cee --- /dev/null +++ b/bench/workgen/Makefile.am @@ -0,0 +1,32 @@ +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS += -I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility + +PYSRC = $(top_srcdir)/bench/workgen +PYDIRS = -t $(abs_builddir) -I $(abs_top_srcdir):$(abs_top_builddir) -L $(abs_top_builddir)/.libs +all-local: _workgen.so libworkgen.la +libworkgen_la_SOURCES = workgen.cxx workgen_func.c +noinst_LTLIBRARIES = libworkgen.la + +# We keep generated Python sources under bench/workgen. +$(PYSRC)/workgen_wrap.cxx: $(PYSRC)/workgen.h $(PYSRC)/workgen.swig + (cd $(PYSRC) && \ + $(SWIG) -c++ -python -threads -O -Wall -I$(abs_top_builddir) -outdir ./workgen workgen.swig) + +_workgen.so: $(top_builddir)/libwiredtiger.la $(PYSRC)/workgen_wrap.cxx libworkgen.la $(PYSRC)/workgen.h $(PYSRC)/workgen_time.h + (cd $(PYSRC) && \ + $(PYTHON) setup.py build_ext -f -b $(abs_builddir) $(PYDIRS)) + +install-exec-local: + (cd $(PYSRC) && \ + $(PYTHON) setup.py build_py -d $(abs_builddir)/build && \ + $(PYTHON) setup.py build_ext -f -b $(abs_builddir)/build $(PYDIRS) && \ + $(PYTHON) setup.py install_lib -b $(abs_builddir)/build --skip-build $(PYTHON_INSTALL_ARG)) + +# We build in different places for an install vs running from the tree: +# clean up both. Don't rely on "setup.py clean" -- everything that should +# be removed is created under the build directory. +clean-local: + rm -rf build _workgen.so workgen_wrap.o WT_TEST + +TESTS = run-ex_access diff --git a/bench/workgen/runner/example_simple.py b/bench/workgen/runner/example_simple.py new file mode 100755 index 00000000000..de944cbe29e --- /dev/null +++ b/bench/workgen/runner/example_simple.py @@ -0,0 +1,31 @@ +#!/usr/bin/python +from runner import * +from wiredtiger import * +from workgen import * + +def show(tname): + print('') + print('<><><><> ' + tname + ' <><><><>') + c = s.open_cursor(tname, None) + for k,v in c: + print('key: ' + k) + print('value: ' + v) + print('<><><><><><><><><><><><>') + c.close() + +context = Context() +conn = wiredtiger_open("WT_TEST", "create,cache_size=1G") +s = conn.open_session() +tname = 'table:simple' +s.create(tname, 'key_format=S,value_format=S') + +ops = Operation(Operation.OP_INSERT, Table(tname), Key(Key.KEYGEN_APPEND, 10), Value(40)) +thread = Thread(ops) +workload = Workload(context, thread) +workload.run(conn) +show(tname) + +thread = Thread(ops * 5) +workload = Workload(context, thread) +workload.run(conn) +show(tname) diff --git a/bench/workgen/runner/example_txn.py b/bench/workgen/runner/example_txn.py new file mode 100644 index 00000000000..ef1d7a93941 --- /dev/null +++ b/bench/workgen/runner/example_txn.py @@ -0,0 +1,29 @@ +#!/usr/bin/python +from runner import * +from wiredtiger import * +from workgen import * + +conn = wiredtiger_open("WT_TEST", "create,cache_size=500MB") +s = conn.open_session() +tname = "table:test" +s.create(tname, 'key_format=S,value_format=S') +table = Table(tname) +table.options.key_size = 20 +table.options.value_size = 100 + +context = Context() +op = Operation(Operation.OP_INSERT, table) +thread = Thread(op * 500000) +pop_workload = Workload(context, thread) +print('populate:') +pop_workload.run(conn) + +opread = Operation(Operation.OP_SEARCH, table) +opwrite = Operation(Operation.OP_INSERT, table) +treader = Thread(opread) +twriter = Thread(txn(opwrite * 2)) +workload = Workload(context, treader * 8 + twriter * 2) +workload.options.run_time = 10 +workload.options.report_interval = 5 +print('transactional write workload:') +workload.run(conn) diff --git a/bench/workgen/runner/insert_test.py b/bench/workgen/runner/insert_test.py new file mode 100644 index 00000000000..30f2818e91e --- /dev/null +++ b/bench/workgen/runner/insert_test.py @@ -0,0 +1,94 @@ +#!/usr/bin/python +from runner import * +from wiredtiger import * +from workgen import * + +def tablename(id): + return "table:test%06d" % id + +def show(tname): + print('') + print('<><><><> ' + tname + ' <><><><>') + c = s.open_cursor(tname, None) + for k,v in c: + print('key: ' + k) + print('value: ' + v) + print('<><><><><><><><><><><><>') + c.close() + +def expectException(expr): + gotit = False + try: + expr() + except BaseException as e: + print('got expected exception: ' + str(e)) + gotit = True + if not gotit: + raise Exception("missing expected exception") + +context = Context() +conn = wiredtiger_open("WT_TEST", "create,cache_size=1G") +s = conn.open_session() +tname0 = tablename(0) +tname1 = tablename(1) +s.create(tname0, 'key_format=S,value_format=S') +s.create(tname1, 'key_format=S,value_format=S') + +ops = Operation(Operation.OP_INSERT, Table(tname0), Key(Key.KEYGEN_APPEND, 10), Value(100)) +workload = Workload(context, Thread(ops)) + +print('RUN1') +workload.run(conn) +show(tname0) + +# The context has memory of how many keys are in all the tables. +# truncate goes behind context's back, but it doesn't matter for +# an insert-only test. +s.truncate(tname0, None, None) + +# Show how to 'multiply' operations +op = Operation(Operation.OP_INSERT, Table(tname0), Key(Key.KEYGEN_APPEND, 10), Value(100)) +op2 = Operation(Operation.OP_INSERT, Table(tname1), Key(Key.KEYGEN_APPEND, 20), Value(30)) +o = op2 * 10 +print 'op is: ' + str(op) +print 'multiplying op is: ' + str(o) +thread0 = Thread(o + op + op) +workload = Workload(context, thread0) +print('RUN2') +workload.run(conn) +show(tname0) +show(tname1) + +s.truncate(tname0, None, None) +s.truncate(tname1, None, None) + +# operations can be multiplied, added in any combination. +op += Operation(Operation.OP_INSERT, Table(tname0), Key(Key.KEYGEN_APPEND, 10), Value(10)) +op *= 2 +op += Operation(Operation.OP_INSERT, Table(tname0), Key(Key.KEYGEN_APPEND, 10), Value(10)) +thread0 = Thread(op * 10 + op2 * 20) +workload = Workload(context, thread0) +print('RUN3') +workload.run(conn) +show(tname0) +show(tname1) + +print('workload is ' + str(workload)) +print('thread0 is ' + str(thread0)) + +def assignit(k, n): + k._size = n + +expectException(lambda: Operation( + Operation.OP_INSERT, Table('foo'), Key(Key.KEYGEN_APPEND, 10))) +# we don't catch this exception here, but in Workload.run() +k = Key(Key.KEYGEN_APPEND, 1) +assignit(k, 30) +assignit(k, 1) # we don't catch this exception here, but in Workload.run() +op = Operation(Operation.OP_INSERT, Table(tname0), k, Value(10)) +workload = Workload(context, Thread(op)) +print('RUN4') +expectException(lambda: workload.run(conn)) + +print('HELP:') +print(workload.options.help()) diff --git a/bench/workgen/runner/multi_btree_heavy_stress.py b/bench/workgen/runner/multi_btree_heavy_stress.py new file mode 100644 index 00000000000..0993f60248d --- /dev/null +++ b/bench/workgen/runner/multi_btree_heavy_stress.py @@ -0,0 +1,102 @@ +#!/usr/bin/python +# Drive a constant high workload through, even if WiredTiger isn't keeping +# up by dividing the workload across a lot of threads. This needs to be +# tuned to the particular machine so the workload is close to capacity in the +# steady state, but not overwhelming. +# +################ +# Note: as a proof of concept for workgen, this matches closely +# bench/wtperf/runner/multi-btree-read-heavy-stress.wtperf . +# Run time, #ops, #threads are ratcheted way down for testing. +# +from runner import * +from wiredtiger import * +from workgen import * + +def op_append(ops, op): + if ops == None: + ops = op + else: + ops += op + return ops + +def make_op(optype, table, key, value = None): + if value == None: + return Operation(optype, table, key) + else: + return Operation(optype, table, key, value) + +logkey = Key(Key.KEYGEN_APPEND, 8) ## should be 8 bytes format 'Q' +def operations(optype, tables, key, value = None, ops_per_txn = 0, logtable = None): + txn_list = [] + ops = None + nops = 0 + for table in tables: + ops = op_append(ops, make_op(optype, table, key, value)) + if logtable != None: + ops = op_append(ops, make_op(optype, logtable, logkey, value)) + nops += 1 + if ops_per_txn > 0 and nops % ops_per_txn == 0: + txn_list.append(txn(ops)) + ops = None + if ops_per_txn > 0: + if ops != None: + txn_list.append(txn(ops)) + ops = None + for t in txn_list: + ops = op_append(ops, t) + return ops + +context = Context() +## cache_size=20GB +conn_config="create,cache_size=1GB,session_max=1000,eviction=(threads_min=4,threads_max=8),log=(enabled=false),transaction_sync=(enabled=false),checkpoint_sync=true,checkpoint=(wait=60),statistics=(fast),statistics_log=(json,wait=1)" +table_config="allocation_size=4k,memory_page_max=10MB,prefix_compression=false,split_pct=90,leaf_page_max=32k,internal_page_max=16k,type=file,block_compressor=snappy" +conn_config += extensions_config(['compressors/snappy']) +conn = wiredtiger_open("WT_TEST", conn_config) +s = conn.open_session() + +tables = [] +for i in range(0, 8): + tname = "table:test" + str(i) + s.create(tname, 'key_format=S,value_format=S,' + table_config) + tables.append(Table(tname)) +tname = "table:log" +# TODO: use table_config for the log file? +s.create(tname, 'key_format=S,value_format=S,' + table_config) +logtable = Table(tname) + +##icount=200000000 / 8 +icount=20000 +ins_ops = operations(Operation.OP_INSERT, tables, Key(Key.KEYGEN_APPEND, 20), Value(500)) +thread = Thread(ins_ops * icount) +pop_workload = Workload(context, thread) +print('populate:') +pop_workload.run(conn) + +ins_ops = operations(Operation.OP_INSERT, tables, Key(Key.KEYGEN_APPEND, 20), Value(500), 0, logtable) +upd_ops = operations(Operation.OP_UPDATE, tables, Key(Key.KEYGEN_UNIFORM, 20), Value(500), 0, logtable) +read_ops = operations(Operation.OP_SEARCH, tables, Key(Key.KEYGEN_UNIFORM, 20), None, 3) + +ins_thread = Thread(ins_ops) +upd_thread = Thread(upd_ops) +read_thread = Thread(read_ops) +ins_thread.options.throttle = 250 +ins_thread.options.name = "Insert" +upd_thread.options.throttle = 250 +upd_thread.options.name = "Update" +read_thread.options.throttle = 1000 +read_thread.options.name = "Read" +##threads = [ins_thread] * 10 + [upd_thread] * 10 + [read_thread] * 80 +threads = ins_thread * 1 + upd_thread * 1 + read_thread * 2 +workload = Workload(context, threads) +##workload.options.run_time = 3600 +workload.options.run_time = 30 +workload.options.report_interval = 1 +workload.options.sample_interval = 5 +workload.options.sample_rate = 1 +print('heavy stress workload:') +workload.run(conn) + +latency_filename = conn.get_home() + '/latency.out' +print('for latency output, see: ' + latency_filename) +latency.workload_latency(workload, latency_filename) diff --git a/bench/workgen/runner/runner/__init__.py b/bench/workgen/runner/runner/__init__.py new file mode 100644 index 00000000000..67b547bc51b --- /dev/null +++ b/bench/workgen/runner/runner/__init__.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# runner/__init__.py +# Used as a first import by runners, does any common initialization. +from __future__ import print_function + +import os, shutil, sys +thisdir = os.path.dirname(os.path.abspath(__file__)) +workgen_src = os.path.dirname(os.path.dirname(thisdir)) +wt_dir = os.path.dirname(os.path.dirname(workgen_src)) +wt_builddir = os.path.join(wt_dir, 'build_posix') + +def _prepend_env_path(pathvar, s): + last = '' + try: + last = ':' + os.environ[pathvar] + except: + pass + os.environ[pathvar] = s + last + +# Initialize the python path so needed modules can be imported. +# If the path already works, don't change it. +try: + import wiredtiger +except: + # We'll try hard to make the importing work, we'd like to runners + # to be executable directly without having to set environment variables. + sys.path.insert(0, os.path.join(wt_dir, 'lang', 'python')) + sys.path.insert(0, os.path.join(wt_builddir, 'lang', 'python')) + try: + import wiredtiger + except: + # If the .libs directory is not in our library search path, + # we need to set it and retry. However, the dynamic link + # library has already cached its value, our only option is + # to restart the Python interpreter. + if '_workgen_init' not in os.environ: + os.environ['_workgen_init'] = 'true' + dotlibs = os.path.join(wt_builddir, '.libs') + _prepend_env_path('LD_LIBRARY_PATH', dotlibs) + _prepend_env_path('DYLD_LIBRARY_PATH', dotlibs) + py_args = sys.argv + py_args.insert(0, sys.executable) + try: + os.execv(sys.executable, py_args) + except Exception, exception: + print('re-exec failed: ' + str(exception), file=sys.stderr) + print(' exec(' + sys.executable + ', ' + str(py_args) + ')') + print('Try adding "' + dotlibs + '" to the', file=sys.stderr) + print('LD_LIBRARY_PATH environment variable before running ' + \ + 'this program again.', file=sys.stderr) + sys.exit(1) + +try: + import workgen +except: + sys.path.insert(0, os.path.join(workgen_src, 'workgen')) + sys.path.insert(0, os.path.join(wt_builddir, 'bench', 'workgen')) + import workgen + +# Clear out the WT_TEST directory. +shutil.rmtree('WT_TEST', True) +os.mkdir('WT_TEST') + +from .core import txn, extensions_config +from .latency import workload_latency diff --git a/bench/workgen/runner/runner/core.py b/bench/workgen/runner/runner/core.py new file mode 100644 index 00000000000..a0f0d4d77cd --- /dev/null +++ b/bench/workgen/runner/runner/core.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# runner/core.py +# Core functions available to all runners +import glob, os +import workgen + +# txn -- +# Put the operation (and any suboperations) within a transaction. +def txn(op, config=None): + t = workgen.Transaction(config) + op._transaction = t + return op + +# Check for a local build that contains the wt utility. First check in +# current working directory, then in build_posix and finally in the disttop +# directory. This isn't ideal - if a user has multiple builds in a tree we +# could pick the wrong one. +def _wiredtiger_builddir(): + if os.path.isfile(os.path.join(os.getcwd(), 'wt')): + return os.getcwd() + + # The directory of this file should be within the distribution tree. + thisdir = os.path.dirname(os.path.abspath(__file__)) + wt_disttop = os.path.join(\ + thisdir, os.pardir, os.pardir, os.pardir, os.pardir) + if os.path.isfile(os.path.join(wt_disttop, 'wt')): + return wt_disttop + if os.path.isfile(os.path.join(wt_disttop, 'build_posix', 'wt')): + return os.path.join(wt_disttop, 'build_posix') + if os.path.isfile(os.path.join(wt_disttop, 'wt.exe')): + return wt_disttop + raise Exception('Unable to find useable WiredTiger build') + +# Return the wiredtiger_open extension argument for any needed shared library. +# Called with a list of extensions, e.g. +# [ 'compressors/snappy', 'encryptors/rotn=config_string' ] +def extensions_config(exts): + result = '' + extfiles = {} + errpfx = 'extensions_config' + builddir = _wiredtiger_builddir() + for ext in exts: + extconf = '' + if '=' in ext: + splits = ext.split('=', 1) + ext = splits[0] + extconf = '=' + splits[1] + splits = ext.split('/') + if len(splits) != 2: + raise Exception(errpfx + ": " + ext + + ": extension is not named

/") + libname = splits[1] + dirname = splits[0] + pat = os.path.join(builddir, 'ext', + dirname, libname, '.libs', 'libwiredtiger_*.so') + filenames = glob.glob(pat) + if len(filenames) == 0: + raise Exception(errpfx + + ": " + ext + + ": no extensions library found matching: " + pat) + elif len(filenames) > 1: + raise Exception(errpfx + ": " + ext + + ": multiple extensions libraries found matching: " + pat) + complete = '"' + filenames[0] + '"' + extconf + if ext in extfiles: + if extfiles[ext] != complete: + raise Exception(errpfx + + ": non-matching extension arguments in " + + str(exts)) + else: + extfiles[ext] = complete + if len(extfiles) != 0: + result = ',extensions=[' + ','.join(extfiles.values()) + ']' + return result diff --git a/bench/workgen/runner/runner/latency.py b/bench/workgen/runner/runner/latency.py new file mode 100644 index 00000000000..46d9be9bad8 --- /dev/null +++ b/bench/workgen/runner/runner/latency.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# runner/latency.py +# Utility functions for showing latency statistics +from __future__ import print_function +import sys + +def _show_buckets(fh, title, mult, buckets, n): + shown = False + s = title + ': ' + for count in range(0, n): + val = buckets[count] + if val != 0: + if shown: + s += ',' + s += str(count*mult) + '=' + str(val) + shown = True + print(s, file=fh) + +def _latency_preprocess(arr, merge): + mx = 0 + cur = 0 + # SWIG arrays have a clunky interface + for i in range(0, arr.__len__()): + if i % merge == 0: + cur = 0 + cur += arr[i] + if cur > mx: + mx = cur + arr.height = mx + +def _latency_plot(box, ch, left, width, arr, merge, scale): + pos = 0 + for x in range(0, width): + t = 0 + for i in range(0, merge): + t += arr[pos] + pos += 1 + nch = scale * t + y = 0 + while nch > 0.0: + box[y][left + x] = ch + nch -= 1.0 + y += 1 + +def _latency_optype(fh, name, ch, t): + if t.ops == 0: + return + if t.latency_ops == 0: + print('**** ' + name + ' operations: ' + str(t.ops), file=fh) + return + print('**** ' + name + ' operations: ' + str(t.ops) + \ + ', latency operations: ' + str(t.latency_ops), file=fh) + print(' avg: ' + str(t.latency/t.latency_ops) + \ + ', min: ' + str(t.min_latency) + ', max: ' + str(t.max_latency), + file=fh) + us = t.us() + ms = t.ms() + sec = t.sec() + _latency_preprocess(us, 40) + _latency_preprocess(ms, 40) + _latency_preprocess(sec, 4) + max_height = max(us.height, ms.height, sec.height) + if max_height == 0: + return + height = 20 # 20 chars high + # a list of a list of characters + box = [list(' ' * 80) for x in range(height)] + scale = (1.0 / (max_height + 1)) * height + _latency_plot(box, ch, 0, 25, us, 40, scale) + _latency_plot(box, ch, 27, 25, ms, 40, scale) + _latency_plot(box, ch, 54, 25, sec, 4, scale) + box.reverse() + for line in box: + print(''.join(line), file=fh) + dash25 = '-' * 25 + print(' '.join([dash25] * 3), file=fh) + print(' 0 - 999 us (40/bucket) 1 - 999 ms (40/bucket) ' + \ + '1 - 99 sec (4/bucket)', file=fh) + print('', file=fh) + _show_buckets(fh, name + ' us', 1, us, 1000) + _show_buckets(fh, name + ' ms', 1000, ms, 1000) + _show_buckets(fh, name + ' sec', 1000000, sec, 100) + print('', file=fh) + +def workload_latency(workload, outfilename = None): + if outfilename: + fh = open(outfilename, 'w') + else: + fh = sys.stdout + _latency_optype(fh, 'insert', 'I', workload.stats.insert) + _latency_optype(fh, 'read', 'R', workload.stats.read) + _latency_optype(fh, 'remove', 'X', workload.stats.remove) + _latency_optype(fh, 'update', 'U', workload.stats.update) + _latency_optype(fh, 'truncate', 'T', workload.stats.truncate) + _latency_optype(fh, 'not found', 'N', workload.stats.not_found) diff --git a/bench/workgen/runner/small_btree.py b/bench/workgen/runner/small_btree.py new file mode 100644 index 00000000000..d70f0d9e693 --- /dev/null +++ b/bench/workgen/runner/small_btree.py @@ -0,0 +1,27 @@ +#!/usr/bin/python +from runner import * +from wiredtiger import * +from workgen import * + +context = Context() +conn = wiredtiger_open("WT_TEST", "create,cache_size=500MB") +s = conn.open_session() +tname = "file:test.wt" +s.create(tname, 'key_format=S,value_format=S') +table = Table(tname) +table.options.key_size = 20 +table.options.value_size = 100 + +op = Operation(Operation.OP_INSERT, table) +thread = Thread(op * 500000) +pop_workload = Workload(context, thread) +print('populate:') +pop_workload.run(conn) + +op = Operation(Operation.OP_SEARCH, table) +t = Thread(op) +workload = Workload(context, t * 8) +workload.options.run_time = 120 +workload.options.report_interval = 5 +print('read workload:') +workload.run(conn) diff --git a/bench/workgen/setup.py b/bench/workgen/setup.py new file mode 100644 index 00000000000..79d3fc4297c --- /dev/null +++ b/bench/workgen/setup.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +from __future__ import print_function +import re, os, sys +from distutils.core import setup, Extension + +# OS X hack: turn off the Universal binary support that is built into the +# Python build machinery, just build for the default CPU architecture. +if not 'ARCHFLAGS' in os.environ: + os.environ['ARCHFLAGS'] = '' + +# Suppress warnings building SWIG generated code +extra_cflags = [ '-Wmissing-field-initializers', '-Wextra', '-Wno-shadow', '-I../../src/include', '-I../../test/utility'] + +dir = os.path.dirname(__file__) +abs_dir = os.path.dirname(os.path.abspath(__file__)) + +if abs_dir.endswith(os.sep + os.path.join('bench', 'workgen')): + wt_dir = os.path.dirname(os.path.dirname(abs_dir)) +else: + print(os.path.basename(__file__) + ": running from unknown dir", file=sys.stderr) + sys.exit(1) + +build_dir = os.path.join(wt_dir, 'build_posix') + +# Read the version information from the RELEASE_INFO file +for l in open(os.path.join(dir, '..', '..', 'RELEASE_INFO')): + if re.match(r'WIREDTIGER_VERSION_(?:MAJOR|MINOR|PATCH)=', l): + exec(l) + +wt_ver = '%d.%d' % (WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR) + +setup(name='workgen', version=wt_ver, + ext_modules=[Extension('_workgen', + [os.path.join(dir, 'workgen_wrap.cxx')], + libraries=['wiredtiger', 'pthread'], + extra_objects = [ os.path.join(build_dir, 'bench', 'workgen', \ + '.libs', 'libworkgen.a') ], + extra_compile_args=extra_cflags, + )], + package_dir={'' : dir}, + packages=['workgen'], +) diff --git a/bench/workgen/workgen.cxx b/bench/workgen/workgen.cxx new file mode 100644 index 00000000000..c56acfd2989 --- /dev/null +++ b/bench/workgen/workgen.cxx @@ -0,0 +1,1605 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define __STDC_LIMIT_MACROS // needed to get UINT64_MAX in C++ +#include +#include +#include +#include +#include "wiredtiger.h" +#include "workgen.h" +#include "workgen_int.h" +#include "workgen_time.h" +extern "C" { +// Include some specific WT files, as some files included by wt_internal.h +// have some C-ism's that don't work in C++. +#include +#include +#include +#include +#include +#include +#include +#include "error.h" +#include "misc.h" +} + +#define LATENCY_US_BUCKETS 1000 +#define LATENCY_MS_BUCKETS 1000 +#define LATENCY_SEC_BUCKETS 100 + +#define THROTTLE_PER_SEC 20 // times per sec we will throttle + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) < (b) ? (b) : (a)) +#define TIMESPEC_DOUBLE(ts) ((double)(ts).tv_sec + ts.tv_nsec * 0.000000001) +#define PCT(n, total) ((total) == 0 ? 0 : ((n) * 100) / (total)) +#define OPS_PER_SEC(ops, ts) (int) ((ts) == 0 ? 0.0 : \ + (ops) / TIMESPEC_DOUBLE(ts)) + +// Get the value of a STL container, even if it is not present +#define CONTAINER_VALUE(container, idx, dfault) \ + (((container).count(idx) > 0) ? (container)[idx] : (dfault)) + +#define CROSS_USAGE(a, b) \ + (((a & USAGE_READ) != 0 && (b & USAGE_WRITE) != 0) || \ + ((a & USAGE_WRITE) != 0 && (b & USAGE_READ) != 0)) + +#define ASSERT(cond) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "%s:%d: ASSERT failed: %s\n", \ + __FILE__, __LINE__, #cond); \ + abort(); \ + } \ + } while(0) + +#define THROW_ERRNO(e, args) \ + do { \ + std::stringstream __sstm; \ + __sstm << args; \ + WorkgenException __wge(e, __sstm.str().c_str()); \ + throw(__wge); \ + } while(0) + +#define THROW(args) THROW_ERRNO(0, args) + +#define VERBOSE(runner, args) \ + do { \ + if ((runner)._context->_verbose) \ + std::cout << args << std::endl; \ + } while(0) + +#define OP_HAS_VALUE(op) \ + ((op)->_optype == Operation::OP_INSERT || \ + (op)->_optype == Operation::OP_UPDATE) + +namespace workgen { + +// The number of contexts. Normally there is one context created, but it will +// be possible to use several eventually. More than one is not yet +// implemented, but we must at least guard against the caller creating more +// than one. +static uint32_t context_count = 0; + +static void *thread_runner_main(void *arg) { + ThreadRunner *runner = (ThreadRunner *)arg; + try { + runner->_errno = runner->run(); + } catch (WorkgenException &wge) { + runner->_exception = wge; + } + return (NULL); +} + +static void *monitor_main(void *arg) { + Monitor *monitor = (Monitor *)arg; + try { + monitor->_errno = monitor->run(); + } catch (WorkgenException &wge) { + monitor->_exception = wge; + } + return (NULL); +} + +// Exponentiate (like the pow function), except that it returns an exact +// integral 64 bit value, and if it overflows, returns the maximum possible +// value for the return type. +static uint64_t power64(int base, int exp) { + uint64_t last, result; + + result = 1; + for (int i = 0; i < exp; i++) { + last = result; + result *= base; + if (result < last) + return UINT64_MAX; + } + return result; +} + +OptionsList::OptionsList() : _option_map() {} +OptionsList::OptionsList(const OptionsList &other) : + _option_map(other._option_map) {} + +void OptionsList::add_option(const char *name, const std::string typestr, + const char *desc) { + TypeDescPair pair(typestr, desc); + _option_map[name] = pair; +} + +void OptionsList::add_int(const char *name, int default_value, + const char *desc) { + std::stringstream sstm; + sstm << "int, default=" << default_value; + add_option(name, sstm.str(), desc); +} + +void OptionsList::add_bool(const char *name, bool default_value, + const char *desc) { + std::stringstream sstm; + sstm << "boolean, default=" << (default_value ? "true" : "false"); + add_option(name, sstm.str(), desc); +} + +void OptionsList::add_double(const char *name, double default_value, + const char *desc) { + std::stringstream sstm; + sstm << "double, default=" << default_value; + add_option(name, sstm.str(), desc); +} + +void OptionsList::add_string(const char *name, + const std::string &default_value, const char *desc) { + std::stringstream sstm; + sstm << "string, default=\"" << default_value << "\""; + add_option(name, sstm.str(), desc); +} + +static void +pretty_print(const char *p, const char *indent, std::stringstream &sstm) +{ + const char *t; + + for (;; p = t + 1) { + if (strlen(p) <= 70) + break; + for (t = p + 70; t > p && *t != ' '; --t) + ; + if (t == p) /* No spaces? */ + break; + if (indent != NULL) + sstm << indent; + std::string line(p, (size_t)(t - p)); + sstm << line << std::endl; + } + if (*p != '\0') { + if (indent != NULL) + sstm << indent; + sstm << p << std::endl; + } +} + +std::string OptionsList::help() const { + std::stringstream sstm; + for (std::map::const_iterator i = + _option_map.begin(); i != _option_map.end(); i++) { + sstm << i->first << " (" << i->second.first << ")" << std::endl; + pretty_print(i->second.second.c_str(), "\t", sstm); + } + return sstm.str(); +} + +std::string OptionsList::help_description(const char *option_name) const { + const std::string key(option_name); + if (_option_map.count(key) == 0) + return (std::string("")); + else + return (_option_map.find(key)->second.second); +} + +std::string OptionsList::help_type(const char *option_name) const { + const std::string key(option_name); + if (_option_map.count(key) == 0) + return std::string(""); + else + return (_option_map.find(key)->second.first); +} + +Context::Context() : _verbose(false), _internal(new ContextInternal()) {} +Context::~Context() { delete _internal; } +Context& Context::operator=(const Context &other) { + _verbose = other._verbose; + *_internal = *other._internal; + return (*this); +} + +ContextInternal::ContextInternal() : _tint(), _table_names(), + _recno(NULL), _recno_alloced(0), _tint_last(0), _context_count(0) { + uint32_t count; + if ((count = workgen_atomic_add32(&context_count, 1)) != 1) + THROW("multiple Contexts not supported"); + _context_count = count; +} + +ContextInternal::~ContextInternal() { + if (_recno != NULL) + delete _recno; +} + +int ContextInternal::create_all() { + if (_recno_alloced != _tint_last) { + // The array references are 1-based, we'll waste one entry. + uint64_t *new_recno = new uint64_t[_tint_last + 1]; + memcpy(new_recno, _recno, sizeof(uint64_t) * _recno_alloced); + memset(&new_recno[_recno_alloced], 0, + sizeof(uint64_t) * (_tint_last - _recno_alloced + 1)); + delete _recno; + _recno = new_recno; + _recno_alloced = _tint_last; + } + return (0); +} + +Monitor::Monitor(WorkloadRunner &wrunner) : + _errno(0), _exception(), _wrunner(wrunner), _stop(false), _handle() {} +Monitor::~Monitor() {} + +int Monitor::run() { + struct timespec t; + struct tm *tm, _tm; + char time_buf[64]; + Stats prev_totals; + WorkloadOptions *options = &_wrunner._workload->options; + uint64_t latency_max = (uint64_t)options->max_latency; + + (*_out) << "#time," + << "totalsec," + << "read ops per second," + << "insert ops per second," + << "update ops per second," + << "checkpoints," + << "read average latency(uS)," + << "read minimum latency(uS)," + << "read maximum latency(uS)," + << "insert average latency(uS)," + << "insert min latency(uS)," + << "insert maximum latency(uS)," + << "update average latency(uS)," + << "update min latency(uS)," + << "update maximum latency(uS)" + << std::endl; + + Stats prev_interval; + while (!_stop) { + for (int i = 0; i < options->sample_interval && !_stop; i++) + sleep(1); + if (_stop) + break; + + workgen_epoch(&t); + tm = localtime_r(&t.tv_sec, &_tm); + (void)strftime(time_buf, sizeof(time_buf), "%b %d %H:%M:%S", tm); + + Stats new_totals(true); + for (std::vector::iterator tr = + _wrunner._trunners.begin(); tr != _wrunner._trunners.end(); tr++) + new_totals.add(tr->_stats, true); + Stats interval(new_totals); + interval.subtract(prev_totals); + interval.smooth(prev_interval); + + int interval_secs = options->sample_interval; + uint64_t cur_reads = interval.read.ops / interval_secs; + uint64_t cur_inserts = interval.insert.ops / interval_secs; + uint64_t cur_updates = interval.update.ops / interval_secs; + + uint64_t totalsec = ts_sec(t - _wrunner._start); + (*_out) << time_buf + << "," << totalsec + << "," << cur_reads + << "," << cur_inserts + << "," << cur_updates + << "," << 'N' // checkpoint in progress + << "," << interval.read.average_latency() + << "," << interval.read.min_latency + << "," << interval.read.max_latency + << "," << interval.insert.average_latency() + << "," << interval.insert.min_latency + << "," << interval.insert.max_latency + << "," << interval.update.average_latency() + << "," << interval.update.min_latency + << "," << interval.update.max_latency + << std::endl; + + uint64_t read_max = interval.read.max_latency; + uint64_t insert_max = interval.read.max_latency; + uint64_t update_max = interval.read.max_latency; + + if (latency_max != 0 && + (read_max > latency_max || insert_max > latency_max || + update_max > latency_max)) { + std::cerr << "WARNING: max latency exceeded:" + << " threshold " << latency_max + << " read max " << read_max + << " insert max " << insert_max + << " update max " << update_max << std::endl; + } + + prev_interval.assign(interval); + prev_totals.assign(new_totals); + } + return (0); +} + +ThreadRunner::ThreadRunner() : + _errno(0), _exception(), _thread(NULL), _context(NULL), _icontext(NULL), + _workload(NULL), _wrunner(NULL), _rand_state(NULL), + _throttle(NULL), _throttle_ops(0), _throttle_limit(0), + _in_transaction(false), _number(0), _stats(false), _table_usage(), + _cursors(NULL), _stop(false), _session(NULL), _keybuf(NULL), + _valuebuf(NULL), _repeat(false) { +} + +ThreadRunner::~ThreadRunner() { + free_all(); +} + +int ThreadRunner::create_all(WT_CONNECTION *conn) { + size_t keysize, valuesize; + + WT_RET(close_all()); + ASSERT(_session == NULL); + WT_RET(conn->open_session(conn, NULL, NULL, &_session)); + _table_usage.clear(); + _stats.track_latency(_workload->options.sample_interval > 0); + WT_RET(workgen_random_alloc(_session, &_rand_state)); + _throttle_ops = 0; + _throttle_limit = 0; + _in_transaction = 0; + keysize = 1; + valuesize = 1; + op_create_all(&_thread->_op, keysize, valuesize); + _keybuf = new char[keysize]; + _valuebuf = new char[valuesize]; + _keybuf[keysize - 1] = '\0'; + _valuebuf[valuesize - 1] = '\0'; + return (0); +} + +int ThreadRunner::open_all() { + typedef WT_CURSOR *WT_CURSOR_PTR; + if (_cursors != NULL) + delete _cursors; + _cursors = new WT_CURSOR_PTR[_icontext->_tint_last + 1]; + memset(_cursors, 0, sizeof (WT_CURSOR *) * (_icontext->_tint_last + 1)); + for (std::map::iterator i = _table_usage.begin(); + i != _table_usage.end(); i++) { + uint32_t tindex = i->first; + const char *uri = _icontext->_table_names[tindex].c_str(); + WT_RET(_session->open_cursor(_session, uri, NULL, NULL, + &_cursors[tindex])); + } + return (0); +} + +int ThreadRunner::close_all() { + if (_throttle != NULL) { + delete _throttle; + _throttle = NULL; + } + if (_session != NULL) { + WT_RET(_session->close(_session, NULL)); + _session = NULL; + } + free_all(); + return (0); +} + +void ThreadRunner::free_all() { + if (_rand_state != NULL) { + workgen_random_free(_rand_state); + _rand_state = NULL; + } + if (_cursors != NULL) { + delete _cursors; + _cursors = NULL; + } + if (_keybuf != NULL) { + delete _keybuf; + _keybuf = NULL; + } + if (_valuebuf != NULL) { + delete _valuebuf; + _valuebuf = NULL; + } +} + +int ThreadRunner::cross_check(std::vector &runners) { + std::map usage; + + // Determine which tables have cross usage + for (std::vector::iterator r = runners.begin(); + r != runners.end(); r++) { + for (std::map::iterator i = r->_table_usage.begin(); + i != r->_table_usage.end(); i++) { + uint32_t tindex = i->first; + uint32_t thisusage = i->second; + uint32_t curusage = CONTAINER_VALUE(usage, tindex, 0); + if (CROSS_USAGE(curusage, thisusage)) + curusage |= USAGE_MIXED; + usage[tindex] = curusage; + } + } + for (std::map::iterator i = usage.begin(); + i != usage.end(); i++) { + if ((i->second & USAGE_MIXED) != 0) { + for (std::vector::iterator r = runners.begin(); + r != runners.end(); r++) { + r->_table_usage[i->first] |= USAGE_MIXED; + } + } + } + return (0); +} + +int ThreadRunner::run() { + WT_DECL_RET; + ThreadOptions *options = &_thread->options; + std::string name = options->name; + + VERBOSE(*this, "thread " << name << " running"); + if (options->throttle != 0) { + _throttle = new Throttle(*this, options->throttle, + options->throttle_burst); + } + for (int cnt = 0; !_stop && (_repeat || cnt < 1) && ret == 0; cnt++) + WT_ERR(op_run(&_thread->_op)); + +err: +#ifdef _DEBUG + { + std::string messages = this->get_debug(); + if (!messages.empty()) + std::cerr << "DEBUG (thread " << name << "): " + << messages << std::endl; + } +#endif + if (ret != 0) + std::cerr << "thread " << name << " failed err=" << ret << std::endl; + VERBOSE(*this, "thread " << name << "finished"); + return (ret); +} + +void ThreadRunner::get_static_counts(Stats &stats) { + _thread->_op.get_static_counts(stats, 1); +} + +void ThreadRunner::op_create_all(Operation *op, size_t &keysize, + size_t &valuesize) { + tint_t tint; + + op->size_check(); + if (op->_optype != Operation::OP_NONE) { + op->kv_compute_max(true); + if (OP_HAS_VALUE(op)) + op->kv_compute_max(false); + op->kv_size_buffer(true, keysize); + op->kv_size_buffer(false, valuesize); + + // Note: to support multiple contexts we'd need a generation + // count whenever we execute. + if (op->_table._internal->_context_count != 0 && + op->_table._internal->_context_count != _icontext->_context_count) + THROW("multiple Contexts not supported"); + if ((tint = op->_table._internal->_tint) == 0) { + std::string uri = op->_table._uri; + + // We are single threaded in this function, so do not have + // to worry about locking. + if (_icontext->_tint.count(uri) == 0) { + // TODO: don't use atomic add, it's overkill. + tint = workgen_atomic_add32(&_icontext->_tint_last, 1); + _icontext->_tint[uri] = tint; + _icontext->_table_names[tint] = uri; + } else + tint = _icontext->_tint[uri]; + op->_table._internal->_tint = tint; + } + uint32_t usage_flags = CONTAINER_VALUE(_table_usage, + op->_table._internal->_tint, 0); + if (op->_optype == Operation::OP_SEARCH) + usage_flags |= ThreadRunner::USAGE_READ; + else + usage_flags |= ThreadRunner::USAGE_WRITE; + _table_usage[op->_table._internal->_tint] = usage_flags; + } + if (op->_group != NULL) + for (std::vector::iterator i = op->_group->begin(); + i != op->_group->end(); i++) + op_create_all(&*i, keysize, valuesize); +} + +uint64_t ThreadRunner::op_get_key_recno(Operation *op, tint_t tint) { + uint64_t recno_count; + uint32_t rand; + + recno_count = _icontext->_recno[tint]; + if (recno_count == 0) + // The file has no entries, returning 0 forces a WT_NOTFOUND return. + return (0); + rand = workgen_random(_rand_state); + return (rand % recno_count + 1); // recnos are one-based. +} + +int ThreadRunner::op_run(Operation *op) { + Track *track; + tint_t tint = op->_table._internal->_tint; + WT_CURSOR *cursor = _cursors[tint]; + WT_DECL_RET; + uint64_t recno; + bool measure_latency; + + recno = 0; + track = NULL; + if (_throttle != NULL) { + if (_throttle_ops >= _throttle_limit && !_in_transaction) { + WT_ERR(_throttle->throttle(_throttle_ops, + &_throttle_limit)); + _throttle_ops = 0; + } + if (op->_optype != Operation::OP_NONE) + ++_throttle_ops; + } + + // A potential race: thread1 is inserting, and increments + // Context->_recno[] for fileX.wt. thread2 is doing one of + // remove/search/update and grabs the new value of Context->_recno[] + // for fileX.wt. thread2 randomly chooses the highest recno (which + // has not yet been inserted by thread1), and when it accesses + // the record will get WT_NOTFOUND. It should be somewhat rare + // (and most likely when the threads are first beginning). Any + // WT_NOTFOUND returns are allowed and get their own statistic bumped. + switch (op->_optype) { + case Operation::OP_INSERT: + track = &_stats.insert; + recno = workgen_atomic_add64(&_icontext->_recno[tint], 1); + break; + case Operation::OP_REMOVE: + track = &_stats.remove; + recno = op_get_key_recno(op, tint); + break; + case Operation::OP_SEARCH: + track = &_stats.read; + recno = op_get_key_recno(op, tint); + break; + case Operation::OP_UPDATE: + track = &_stats.update; + recno = op_get_key_recno(op, tint); + break; + case Operation::OP_NONE: + recno = 0; + break; + } + + measure_latency = track != NULL && track->ops != 0 && + track->track_latency() && + (track->ops % _workload->options.sample_rate == 0); + + timespec start; + if (measure_latency) + workgen_epoch(&start); + + if (op->_transaction != NULL) { + if (_in_transaction) + THROW("nested transactions not supported"); + _session->begin_transaction(_session, + op->_transaction->_begin_config.c_str()); + _in_transaction = true; + } + if (op->_optype != Operation::OP_NONE) { + op->kv_gen(true, recno, _keybuf); + cursor->set_key(cursor, _keybuf); + if (OP_HAS_VALUE(op)) { + op->kv_gen(false, recno, _valuebuf); + cursor->set_value(cursor, _valuebuf); + } + switch (op->_optype) { + case Operation::OP_INSERT: + WT_ERR(cursor->insert(cursor)); + break; + case Operation::OP_REMOVE: + WT_ERR_NOTFOUND_OK(cursor->remove(cursor)); + break; + case Operation::OP_SEARCH: + ret = cursor->search(cursor); + break; + case Operation::OP_UPDATE: + WT_ERR_NOTFOUND_OK(cursor->update(cursor)); + break; + default: + ASSERT(false); + } + if (ret != 0) { + track = &_stats.not_found; + ret = 0; // WT_NOTFOUND allowed. + } + cursor->reset(cursor); + } + if (measure_latency) { + timespec stop; + workgen_epoch(&stop); + track->incr_with_latency(ts_us(stop - start)); + } else if (track != NULL) + track->incr(); + + if (op->_group != NULL) + for (int count = 0; !_stop && count < op->_repeatgroup; count++) + for (std::vector::iterator i = op->_group->begin(); + i != op->_group->end(); i++) + WT_ERR(op_run(&*i)); +err: + if (op->_transaction != NULL) { + if (ret != 0 || op->_transaction->_rollback) + WT_TRET(_session->rollback_transaction(_session, NULL)); + else + ret = _session->commit_transaction(_session, + op->_transaction->_commit_config.c_str()); + _in_transaction = false; + } + return (ret); +} + +#ifdef _DEBUG +std::string ThreadRunner::get_debug() { + return (_debug_messages.str()); +} +#endif + +Throttle::Throttle(ThreadRunner &runner, double throttle, + double throttle_burst) : _runner(runner), _throttle(throttle), + _burst(throttle_burst), _next_div(), _ops_delta(0), _ops_prev(0), + _ops_per_div(0), _ms_per_div(0), _started(false) { + ts_clear(_next_div); + _ms_per_div = ceill(1000.0 / THROTTLE_PER_SEC); + _ops_per_div = ceill(_throttle / THROTTLE_PER_SEC); +} + +Throttle::~Throttle() {} + +// Given a random 32-bit value, return a float value equally distributed +// between -1.0 and 1.0. +static float rand_signed(uint32_t r) { + int sign = ((r & 0x1) == 0 ? 1 : -1); + return (((float)r * sign) / UINT32_MAX); +} + +// Each time throttle is called, we sleep and return a number of operations to +// perform next. To implement this we keep a time calculation in _next_div set +// initially to the current time + 1/THROTTLE_PER_SEC. Each call to throttle +// advances _next_div by 1/THROTTLE_PER_SEC, and if _next_div is in the future, +// we sleep for the difference between the _next_div and the current_time. We +// always return (Thread.options.throttle / THROTTLE_PER_SEC) as the number of +// operations. +// +// The only variation is that the amount of individual sleeps is modified by a +// random amount (which varies more widely as Thread.options.throttle_burst is +// greater). This has the effect of randomizing how much clumping happens, and +// ensures that multiple threads aren't executing in lock step. +// +int Throttle::throttle(uint64_t op_count, uint64_t *op_limit) { + uint64_t ops; + int64_t sleep_ms; + timespec now; + + workgen_epoch(&now); + DEBUG_CAPTURE(_runner, "throttle: ops=" << op_count); + if (!_started) { + _next_div = ts_add_ms(now, _ms_per_div); + _started = true; + } else { + _ops_delta += (op_count - _ops_prev); + if (now < _next_div) { + sleep_ms = ts_ms(_next_div - now); + sleep_ms += (_ms_per_div * _burst * + rand_signed(workgen_random(_runner._rand_state))); + if (sleep_ms > 0) { + DEBUG_CAPTURE(_runner, ", sleep=" << sleep_ms); + usleep((useconds_t)ms_to_us(sleep_ms)); + } + } + _next_div = ts_add_ms(_next_div, _ms_per_div); + } + ops = _ops_per_div; + if (_ops_delta < (int64_t)ops) { + ops -= _ops_delta; + _ops_delta = 0; + } else { + _ops_delta -= ops; + ops = 0; + } + *op_limit = ops; + _ops_prev = ops; + DEBUG_CAPTURE(_runner, ", return=" << ops << std::endl); + return (0); +} + +ThreadOptions::ThreadOptions() : name(), throttle(0.0), throttle_burst(1.0), + _options() { + _options.add_string("name", name, "name of the thread"); + _options.add_double("throttle", throttle, + "Limit to this number of operations per second"); + _options.add_double("throttle_burst", throttle_burst, + "Changes characteristic of throttling from smooth (0.0) " + "to having large bursts with lulls (10.0 or larger)"); +} +ThreadOptions::ThreadOptions(const ThreadOptions &other) : + name(other.name), throttle(other.throttle), + throttle_burst(other.throttle_burst), _options(other._options) {} +ThreadOptions::~ThreadOptions() {} + +void +ThreadListWrapper::extend(const ThreadListWrapper &other) { + for (std::vector::const_iterator i = other._threads.begin(); + i != other._threads.end(); i++) + _threads.push_back(*i); +} + +void +ThreadListWrapper::append(const Thread &t) { + _threads.push_back(t); +} + +void +ThreadListWrapper::multiply(const int n) { + if (n == 0) { + _threads.clear(); + } else { + std::vector copy(_threads); + for (int cnt = 1; cnt < n; cnt++) + extend(copy); + } +} + +Thread::Thread() : options(), _op() { +} + +Thread::Thread(const Operation &op) : options(), _op(op) { +} + +Thread::Thread(const Thread &other) : options(other.options), _op(other._op) { +} + +Thread::~Thread() { +} + +void Thread::describe(std::ostream &os) const { + os << "Thread: [" << std::endl; + _op.describe(os); os << std::endl; + os << "]"; +} + +Operation::Operation() : + _optype(OP_NONE), _table(), _key(), _value(), _transaction(NULL), + _group(NULL), _repeatgroup(0), + _keysize(0), _valuesize(0), _keymax(0), _valuemax(0) { +} + +Operation::Operation(OpType optype, Table table, Key key, Value value) : + _optype(optype), _table(table), _key(key), _value(value), + _transaction(NULL), _group(NULL), _repeatgroup(0), + _keysize(0), _valuesize(0), _keymax(0), _valuemax(0) { + size_check(); +} + +Operation::Operation(OpType optype, Table table, Key key) : + _optype(optype), _table(table), _key(key), _value(), _transaction(NULL), + _group(NULL), _repeatgroup(0), + _keysize(0), _valuesize(0), _keymax(0), _valuemax(0) { + size_check(); +} + +Operation::Operation(OpType optype, Table table) : + _optype(optype), _table(table), _key(), _value(), _transaction(NULL), + _group(NULL), _repeatgroup(0), + _keysize(0), _valuesize(0), _keymax(0), _valuemax(0) { + size_check(); +} + +Operation::Operation(const Operation &other) : + _optype(other._optype), _table(other._table), _key(other._key), + _value(other._value), _transaction(other._transaction), + _group(other._group), _repeatgroup(other._repeatgroup), + _keysize(other._keysize), _valuesize(other._valuesize), + _keymax(other._keymax), _valuemax(other._valuemax) { + // Creation and destruction of _group and _transaction is managed + // by Python. +} + +Operation::~Operation() { + // Creation and destruction of _group, _transaction is managed by Python. +} + +Operation& Operation::operator=(const Operation &other) { + _optype = other._optype; + _table = other._table; + _key = other._key; + _value = other._value; + _transaction = other._transaction; + _group = other._group; + _repeatgroup = other._repeatgroup; + _keysize = other._keysize; + _valuesize = other._valuesize; + _keymax = other._keymax; + _valuemax = other._valuemax; + return (*this); +} + +void Operation::describe(std::ostream &os) const { + os << "Operation: " << _optype; + if (_optype != OP_NONE) { + os << ", "; _table.describe(os); + os << ", "; _key.describe(os); + os << ", "; _value.describe(os); + } + if (_transaction != NULL) { + os << ", ["; _transaction->describe(os); os << "]"; + } + if (_group != NULL) { + os << ", group[" << _repeatgroup << "]: {"; + bool first = true; + for (std::vector::const_iterator i = _group->begin(); + i != _group->end(); i++) { + if (!first) + os << "}, {"; + i->describe(os); + first = false; + } + os << "}"; + } +} + +void Operation::get_static_counts(Stats &stats, int multiplier) { + switch (_optype) { + case OP_NONE: + break; + case OP_INSERT: + stats.insert.ops += multiplier; + break; + case OP_REMOVE: + stats.remove.ops += multiplier; + break; + case OP_SEARCH: + stats.read.ops += multiplier; + break; + case OP_UPDATE: + stats.update.ops += multiplier; + break; + default: + ASSERT(false); + } + if (_group != NULL) + for (std::vector::iterator i = _group->begin(); + i != _group->end(); i++) + i->get_static_counts(stats, multiplier * _repeatgroup); +} + +void Operation::kv_compute_max(bool iskey) { + uint64_t max; + int size; + + size = iskey ? _key._size : _value._size; + if (size == 0) + size = iskey ? _table.options.key_size : _table.options.value_size; + + if (iskey && size < 2) + THROW("Key.size too small for table '" << _table._uri << "'"); + if (!iskey && size < 1) + THROW("Value.size too small for table '" << _table._uri << "'"); + + if (size > 1) + max = power64(10, (size - 1)) - 1; + else + max = 0; + + if (iskey) { + _keysize = size; + _keymax = max; + } else { + _valuesize = size; + _valuemax = max; + } +} + +void Operation::kv_size_buffer(bool iskey, size_t &maxsize) const { + if (iskey) { + if ((size_t)_keysize > maxsize) + maxsize = _keysize; + } else { + if ((size_t)_valuesize > maxsize) + maxsize = _valuesize; + } +} + +void Operation::kv_gen(bool iskey, uint64_t n, char *result) const { + uint64_t max; + int size; + + size = iskey ? _keysize : _valuesize; + max = iskey ? _keymax : _valuemax; + if (n > max) + THROW((iskey ? "Key" : "Value") << " (" << n + << ") too large for size (" << size << ")"); + workgen_u64_to_string_zf(n, result, size); +} + +void Operation::size_check() const { + if (_optype != OP_NONE && _key._size == 0 && _table.options.key_size == 0) + THROW("operation requires a key size"); + if (OP_HAS_VALUE(this) && _value._size == 0 && + _table.options.value_size == 0) + THROW("operation requires a value size"); +} + +Track::Track(bool latency_tracking) : ops(0), latency_ops(0), latency(0), + min_latency(0), max_latency(0), us(NULL), ms(NULL), sec(NULL) { + track_latency(latency_tracking); +} + +Track::Track(const Track &other) : ops(other.ops), + latency_ops(other.latency_ops), latency(other.latency), + min_latency(other.min_latency), max_latency(other.max_latency), + us(NULL), ms(NULL), sec(NULL) { + if (other.us != NULL) { + us = new uint32_t[LATENCY_US_BUCKETS]; + ms = new uint32_t[LATENCY_MS_BUCKETS]; + sec = new uint32_t[LATENCY_SEC_BUCKETS]; + memcpy(us, other.us, sizeof(uint32_t) * LATENCY_US_BUCKETS); + memcpy(ms, other.ms, sizeof(uint32_t) * LATENCY_MS_BUCKETS); + memcpy(sec, other.sec, sizeof(uint32_t) * LATENCY_SEC_BUCKETS); + } +} + +Track::~Track() { + if (us != NULL) { + delete us; + delete ms; + delete sec; + } +} + +void Track::add(Track &other, bool reset) { + ops += other.ops; + latency_ops += other.latency_ops; + latency += other.latency; + + min_latency = MIN(min_latency, other.min_latency); + if (reset) + other.min_latency = 0; + max_latency = MAX(max_latency, other.max_latency); + if (reset) + other.max_latency = 0; + + if (us != NULL && other.us != NULL) { + for (int i = 0; i < LATENCY_US_BUCKETS; i++) + us[i] += other.us[i]; + for (int i = 0; i < LATENCY_MS_BUCKETS; i++) + ms[i] += other.ms[i]; + for (int i = 0; i < LATENCY_SEC_BUCKETS; i++) + sec[i] += other.sec[i]; + } +} + +void Track::assign(const Track &other) { + ops = other.ops; + latency_ops = other.latency_ops; + latency = other.latency; + min_latency = other.min_latency; + max_latency = other.max_latency; + + if (other.us == NULL && us != NULL) { + delete us; + delete ms; + delete sec; + us = NULL; + ms = NULL; + sec = NULL; + } + else if (other.us != NULL && us == NULL) { + us = new uint32_t[LATENCY_US_BUCKETS]; + ms = new uint32_t[LATENCY_MS_BUCKETS]; + sec = new uint32_t[LATENCY_SEC_BUCKETS]; + } + if (us != NULL) { + memcpy(us, other.us, sizeof(uint32_t) * LATENCY_US_BUCKETS); + memcpy(ms, other.ms, sizeof(uint32_t) * LATENCY_MS_BUCKETS); + memcpy(sec, other.sec, sizeof(uint32_t) * LATENCY_SEC_BUCKETS); + } +} + +uint64_t Track::average_latency() const { + if (latency_ops == 0) + return (0); + else + return (latency / latency_ops); +} + +void Track::clear() { + ops = 0; + latency_ops = 0; + latency = 0; + min_latency = 0; + max_latency = 0; + if (us != NULL) { + memset(us, 0, sizeof(uint32_t) * LATENCY_US_BUCKETS); + memset(ms, 0, sizeof(uint32_t) * LATENCY_MS_BUCKETS); + memset(sec, 0, sizeof(uint32_t) * LATENCY_SEC_BUCKETS); + } +} + +void Track::incr() { + ops++; +} + +void Track::incr_with_latency(uint64_t usecs) { + ASSERT(us != NULL); + + ops++; + latency_ops++; + latency += usecs; + if (usecs > max_latency) + max_latency = (uint32_t)usecs; + if (usecs < min_latency) + min_latency = (uint32_t)usecs; + + // Update a latency bucket. + // First buckets: usecs from 100us to 1000us at 100us each. + if (usecs < LATENCY_US_BUCKETS) + us[usecs]++; + + // Second buckets: milliseconds from 1ms to 1000ms, at 1ms each. + else if (usecs < ms_to_us(LATENCY_MS_BUCKETS)) + ms[us_to_ms(usecs)]++; + + // Third buckets are seconds from 1s to 100s, at 1s each. + else if (usecs < sec_to_us(LATENCY_SEC_BUCKETS)) + sec[us_to_sec(usecs)]++; + + // >100 seconds, accumulate in the biggest bucket. */ + else + sec[LATENCY_SEC_BUCKETS - 1]++; +} + +void Track::subtract(const Track &other) { + ops -= other.ops; + latency_ops -= other.latency_ops; + latency -= other.latency; + + // There's no sensible thing to be done for min/max_latency. + + if (us != NULL && other.us != NULL) { + for (int i = 0; i < LATENCY_US_BUCKETS; i++) + us[i] -= other.us[i]; + for (int i = 0; i < LATENCY_MS_BUCKETS; i++) + ms[i] -= other.ms[i]; + for (int i = 0; i < LATENCY_SEC_BUCKETS; i++) + sec[i] -= other.sec[i]; + } +} + +// If there are no entries in this Track, take them from +// a previous Track. Used to smooth graphs. We don't worry +// about latency buckets here. +void Track::smooth(const Track &other) { + if (latency_ops == 0) { + ops = other.ops; + latency = other.latency; + latency_ops = other.latency_ops; + min_latency = other.min_latency; + max_latency = other.max_latency; + } +} + +void Track::track_latency(bool newval) { + if (newval) { + if (us == NULL) { + us = new uint32_t[LATENCY_US_BUCKETS]; + ms = new uint32_t[LATENCY_MS_BUCKETS]; + sec = new uint32_t[LATENCY_SEC_BUCKETS]; + memset(us, 0, sizeof(uint32_t) * LATENCY_US_BUCKETS); + memset(ms, 0, sizeof(uint32_t) * LATENCY_MS_BUCKETS); + memset(sec, 0, sizeof(uint32_t) * LATENCY_SEC_BUCKETS); + } + } else { + if (us != NULL) { + delete us; + delete ms; + delete sec; + us = NULL; + ms = NULL; + sec = NULL; + } + } +} + +void Track::_get_us(long *result) { + if (us != NULL) { + for (int i = 0; i < LATENCY_US_BUCKETS; i++) + result[i] = (long)us[i]; + } else + memset(result, 0, sizeof(long) * LATENCY_US_BUCKETS); +} +void Track::_get_ms(long *result) { + if (ms != NULL) { + for (int i = 0; i < LATENCY_MS_BUCKETS; i++) + result[i] = (long)ms[i]; + } else + memset(result, 0, sizeof(long) * LATENCY_MS_BUCKETS); +} +void Track::_get_sec(long *result) { + if (sec != NULL) { + for (int i = 0; i < LATENCY_SEC_BUCKETS; i++) + result[i] = (long)sec[i]; + } else + memset(result, 0, sizeof(long) * LATENCY_SEC_BUCKETS); +} + +Stats::Stats(bool latency) : insert(latency), not_found(latency), + read(latency), remove(latency), update(latency), truncate(latency) { +} + +Stats::Stats(const Stats &other) : insert(other.insert), + not_found(other.not_found), read(other.read), remove(other.remove), + update(other.update), truncate(other.truncate) { +} + +Stats::~Stats() {} + +void Stats::add(Stats &other, bool reset) { + insert.add(other.insert, reset); + not_found.add(other.not_found, reset); + read.add(other.read, reset); + remove.add(other.remove, reset); + update.add(other.update, reset); + truncate.add(other.truncate, reset); +} + +void Stats::assign(const Stats &other) { + insert.assign(other.insert); + not_found.assign(other.not_found); + read.assign(other.read); + remove.assign(other.remove); + update.assign(other.update); + truncate.assign(other.truncate); +} + +void Stats::clear() { + insert.clear(); + not_found.clear(); + read.clear(); + remove.clear(); + update.clear(); + truncate.clear(); +} + +void Stats::describe(std::ostream &os) const { + os << "Stats: reads " << read.ops; + if (not_found.ops > 0) { + os << " (" << not_found.ops << " not found)"; + } + os << ", inserts " << insert.ops; + os << ", updates " << update.ops; + os << ", truncates " << truncate.ops; + os << ", removes " << remove.ops; +} + +void Stats::final_report(std::ostream &os, timespec &totalsecs) const { + uint64_t ops = 0; + ops += read.ops; + ops += not_found.ops; + ops += insert.ops; + ops += update.ops; + ops += truncate.ops; + ops += remove.ops; + +#define FINAL_OUTPUT(os, field, singular, ops, totalsecs) \ + os << "Executed " << field << " " #singular " operations (" \ + << PCT(field, ops) << "%) " << OPS_PER_SEC(field, totalsecs) \ + << " ops/sec" << std::endl + + FINAL_OUTPUT(os, read.ops, read, ops, totalsecs); + FINAL_OUTPUT(os, not_found.ops, not found, ops, totalsecs); + FINAL_OUTPUT(os, insert.ops, insert, ops, totalsecs); + FINAL_OUTPUT(os, update.ops, update, ops, totalsecs); + FINAL_OUTPUT(os, truncate.ops, truncate, ops, totalsecs); + FINAL_OUTPUT(os, remove.ops, remove, ops, totalsecs); +} + +void Stats::report(std::ostream &os) const { + os << read.ops << " reads"; + if (not_found.ops > 0) { + os << " (" << not_found.ops << " not found)"; + } + os << ", " << insert.ops << " inserts, "; + os << update.ops << " updates, "; + os << truncate.ops << " truncates, "; + os << remove.ops << " removes"; +} + +void Stats::smooth(const Stats &other) { + insert.smooth(other.insert); + not_found.smooth(other.not_found); + read.smooth(other.read); + remove.smooth(other.remove); + update.smooth(other.update); + truncate.smooth(other.truncate); +} + +void Stats::subtract(const Stats &other) { + insert.subtract(other.insert); + not_found.subtract(other.not_found); + read.subtract(other.read); + remove.subtract(other.remove); + update.subtract(other.update); + truncate.subtract(other.truncate); +} + +void Stats::track_latency(bool latency) { + insert.track_latency(latency); + not_found.track_latency(latency); + read.track_latency(latency); + remove.track_latency(latency); + update.track_latency(latency); + truncate.track_latency(latency); +} + +TableOptions::TableOptions() : key_size(0), value_size(0), _options() { + _options.add_int("key_size", key_size, + "default size of the key, unless overridden by Key.size"); + _options.add_int("value_size", value_size, + "default size of the value, unless overridden by Value.size"); +} +TableOptions::TableOptions(const TableOptions &other) : + key_size(other.key_size), value_size(other.value_size), + _options(other._options) {} +TableOptions::~TableOptions() {} + +Table::Table() : options(), _uri(), _internal(new TableInternal()) { +} +Table::Table(const char *uri) : options(), _uri(uri), + _internal(new TableInternal()) { +} +Table::Table(const Table &other) : options(other.options), _uri(other._uri), + _internal(new TableInternal(*other._internal)) { +} +Table::~Table() { delete _internal; } +Table& Table::operator=(const Table &other) { + options = other.options; + _uri = other._uri; + *_internal = *other._internal; + return (*this); +} + +void Table::describe(std::ostream &os) const { + os << "Table: " << _uri; +} + +TableInternal::TableInternal() : _tint(0), _context_count(0) {} +TableInternal::TableInternal(const TableInternal &other) : _tint(other._tint), + _context_count(other._context_count) {} +TableInternal::~TableInternal() {} + +WorkloadOptions::WorkloadOptions() : max_latency(0), + report_file("workload.stat"), report_interval(0), + run_time(0), sample_interval(0), sample_rate(1), + _options() { + _options.add_int("max_latency", max_latency, + "prints warning if any latency measured exceeds this number of " + "milliseconds. Requires sample_interval to be configured."); + _options.add_int("report_interval", report_interval, + "output throughput information every interval seconds, 0 to disable"); + _options.add_string("report_file", report_file, + "file name for collecting run output, " + "including output from the report_interval option. " + "The file name is relative to the connection's home directory. " + "When set to the empty string, stdout is used."); + _options.add_int("run_time", run_time, "total workload seconds"); + _options.add_int("sample_interval", sample_interval, + "performance logging every interval seconds, 0 to disable"); + _options.add_int("sample_rate", sample_rate, + "how often the latency of operations is measured. 1 for every operation, " + "2 for every second operation, 3 for every third operation etc."); +} + +WorkloadOptions::WorkloadOptions(const WorkloadOptions &other) : + max_latency(other.max_latency), report_interval(other.report_interval), + run_time(other.run_time), sample_interval(other.sample_interval), + sample_rate(other.sample_rate), _options(other._options) {} +WorkloadOptions::~WorkloadOptions() {} + +Workload::Workload(Context *context, const ThreadListWrapper &tlw) : + options(), stats(), _context(context), _threads(tlw._threads) { + if (context == NULL) + THROW("Workload contructor requires a Context"); +} + +Workload::Workload(Context *context, const Thread &thread) : + options(), stats(), _context(context), _threads() { + if (context == NULL) + THROW("Workload contructor requires a Context"); + _threads.push_back(thread); +} + +Workload::Workload(const Workload &other) : + options(other.options), stats(other.stats), _context(other._context), + _threads(other._threads) {} +Workload::~Workload() {} + +Workload& Workload::operator=(const Workload &other) { + options = other.options; + stats.assign(other.stats); + *_context = *other._context; + _threads = other._threads; + return (*this); +} + +int Workload::run(WT_CONNECTION *conn) { + WorkloadRunner runner(this); + + return (runner.run(conn)); +} + +WorkloadRunner::WorkloadRunner(Workload *workload) : + _workload(workload), _trunners(workload->_threads.size()), + _report_out(&std::cout), _start() { + ts_clear(_start); +} +WorkloadRunner::~WorkloadRunner() {} + +int WorkloadRunner::run(WT_CONNECTION *conn) { + WT_DECL_RET; + WorkloadOptions *options = &_workload->options; + std::ofstream report_out; + + _wt_home = conn->get_home(conn); + if (options->sample_interval > 0 && options->sample_rate <= 0) + THROW("Workload.options.sample_rate must be positive"); + if (!options->report_file.empty()) { + open_report_file(report_out, options->report_file.c_str(), + "Workload.options.report_file"); + _report_out = &report_out; + } + WT_ERR(create_all(conn, _workload->_context)); + WT_ERR(open_all()); + WT_ERR(ThreadRunner::cross_check(_trunners)); + WT_ERR(run_all()); + err: + //TODO: (void)close_all(); + _report_out = &std::cout; + return (ret); +} + +int WorkloadRunner::open_all() { + for (size_t i = 0; i < _trunners.size(); i++) { + WT_RET(_trunners[i].open_all()); + } + return (0); +} + +void WorkloadRunner::open_report_file(std::ofstream &of, const char *filename, + const char *desc) { + std::stringstream sstm; + + if (!_wt_home.empty()) + sstm << _wt_home << "/"; + sstm << filename; + of.open(sstm.str().c_str(), std::fstream::app); + if (!of) + THROW_ERRNO(errno, desc << ": \"" << sstm.str() + << "\" could not be opened"); +} + +int WorkloadRunner::create_all(WT_CONNECTION *conn, Context *context) { + for (size_t i = 0; i < _trunners.size(); i++) { + ThreadRunner *runner = &_trunners[i]; + std::stringstream sstm; + Thread *thread = &_workload->_threads[i]; + if (thread->options.name.empty()) { + sstm << "thread" << i; + thread->options.name = sstm.str(); + } + runner->_thread = thread; + runner->_context = context; + runner->_icontext = context->_internal; + runner->_workload = _workload; + runner->_wrunner = this; + runner->_number = (uint32_t)i; + // TODO: recover from partial failure here + WT_RET(runner->create_all(conn)); + } + WT_RET(context->_internal->create_all()); + return (0); +} + +int WorkloadRunner::close_all() { + for (size_t i = 0; i < _trunners.size(); i++) + _trunners[i].close_all(); + + return (0); +} + +void WorkloadRunner::get_stats(Stats *result) { + for (size_t i = 0; i < _trunners.size(); i++) + result->add(_trunners[i]._stats); +} + +void WorkloadRunner::report(time_t interval, time_t totalsecs, + Stats *prev_totals) { + std::ostream &out = *_report_out; + Stats new_totals(prev_totals->track_latency()); + + get_stats(&new_totals); + Stats diff(new_totals); + diff.subtract(*prev_totals); + prev_totals->assign(new_totals); + diff.report(out); + out << " in " << interval << " secs (" + << totalsecs << " total secs)" << std::endl; +} + +void WorkloadRunner::final_report(timespec &totalsecs) { + std::ostream &out = *_report_out; + Stats *stats = &_workload->stats; + + stats->clear(); + stats->track_latency(_workload->options.sample_interval > 0); + + get_stats(stats); + stats->final_report(out, totalsecs); + out << "Run completed: " << totalsecs << " seconds" << std::endl; +} + +int WorkloadRunner::run_all() { + void *status; + std::vector thread_handles; + Stats counts(false); + WorkgenException *exception; + WorkloadOptions *options = &_workload->options; + Monitor monitor(*this); + std::ofstream monitor_out; + std::ostream &out = *_report_out; + WT_DECL_RET; + + for (size_t i = 0; i < _trunners.size(); i++) + _trunners[i].get_static_counts(counts); + out << "Starting workload: " << _trunners.size() << " threads, "; + counts.report(out); + out << std::endl; + + workgen_epoch(&_start); + timespec end = _start + options->run_time; + timespec next_report = _start + options->report_interval; + + // Start all threads + if (options->sample_interval > 0) { + open_report_file(monitor_out, "monitor", "monitor output file"); + monitor._out = &monitor_out; + + if ((ret = pthread_create(&monitor._handle, NULL, monitor_main, + &monitor)) != 0) { + std::cerr << "monitor thread failed err=" << ret << std::endl; + return (ret); + } + } + + for (size_t i = 0; i < _trunners.size(); i++) { + pthread_t thandle; + ThreadRunner *runner = &_trunners[i]; + runner->_stop = false; + runner->_repeat = (options->run_time != 0); + if ((ret = pthread_create(&thandle, NULL, thread_runner_main, + runner)) != 0) { + std::cerr << "pthread_create failed err=" << ret << std::endl; + std::cerr << "Stopping all threads." << std::endl; + for (size_t j = 0; j < thread_handles.size(); j++) { + _trunners[j]._stop = true; + (void)pthread_join(thread_handles[j], &status); + _trunners[j].close_all(); + } + return (ret); + } + thread_handles.push_back(thandle); + runner->_stats.clear(); + } + + // Let the test run, reporting as needed. + Stats curstats(false); + timespec now = _start; + while (now < end) { + timespec sleep_amt; + + sleep_amt = end - now; + if (next_report != 0) { + timespec next_diff = next_report - now; + if (next_diff < next_report) + sleep_amt = next_diff; + } + if (sleep_amt.tv_sec > 0) + sleep((unsigned int)sleep_amt.tv_sec); + else + usleep((useconds_t)((sleep_amt.tv_nsec + 999)/ 1000)); + + workgen_epoch(&now); + if (now >= next_report && now < end && options->report_interval != 0) { + report(options->report_interval, (now - _start).tv_sec, &curstats); + while (now >= next_report) + next_report += options->report_interval; + } + } + + // signal all threads to stop + if (options->run_time != 0) + for (size_t i = 0; i < _trunners.size(); i++) + _trunners[i]._stop = true; + if (options->sample_interval > 0) + monitor._stop = true; + + // wait for all threads + exception = NULL; + for (size_t i = 0; i < _trunners.size(); i++) { + WT_TRET(pthread_join(thread_handles[i], &status)); + if (_trunners[i]._errno != 0) + VERBOSE(_trunners[i], + "Thread " << i << " has errno " << _trunners[i]._errno); + WT_TRET(_trunners[i]._errno); + _trunners[i].close_all(); + if (exception == NULL && !_trunners[i]._exception._str.empty()) + exception = &_trunners[i]._exception; + } + if (options->sample_interval > 0) { + WT_TRET(pthread_join(monitor._handle, &status)); + if (monitor._errno != 0) + std::cerr << "Monitor thread has errno " << monitor._errno + << std::endl; + if (exception == NULL && !monitor._exception._str.empty()) + exception = &monitor._exception; + } + + // issue the final report + timespec finalsecs = now - _start; + final_report(finalsecs); + + if (ret != 0) + std::cerr << "run_all failed err=" << ret << std::endl; + (*_report_out) << std::endl; + if (exception != NULL) + throw *exception; + return (ret); +} + +}; diff --git a/bench/workgen/workgen.h b/bench/workgen/workgen.h new file mode 100644 index 00000000000..c1ae01ed5a4 --- /dev/null +++ b/bench/workgen/workgen.h @@ -0,0 +1,410 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include + +namespace workgen { + +struct ContextInternal; +struct TableInternal; +struct Thread; +struct Transaction; + +#ifndef SWIG +struct OptionsList { + OptionsList(); + OptionsList(const OptionsList &other); + + void add_int(const char *name, int default_value, const char *desc); + void add_bool(const char *name, bool default_value, const char *desc); + void add_double(const char *name, double default_value, const char *desc); + void add_string(const char *name, const std::string &default_value, + const char *desc); + + std::string help() const; + std::string help_description(const char *option_name) const; + std::string help_type(const char *option_name) const; + +private: + void add_option(const char *name, const std::string typestr, + const char *desc); + typedef std::pair TypeDescPair; + std::map _option_map; +}; +#endif + +// These classes are all exposed to Python via SWIG. While they may contain +// data that is private to C++, such data must not prevent the objects from +// being shared. Tables, Keys, Values, Operations and Threads can be shared: a +// single Key object might appear in many operations; Operations may appear +// multiple times in a Thread or in different Threads; the same Thread may +// appear multiple times in a Workload list, etc. +// +// Certain kinds of state are allowed: A Table contains a unique pointer that +// is used within the internal part of the Context. Stats contain lots +// of state, but is made available after a Workload.run(). +// +// Python controls the lifetime of (nearly) all objects of these classes. +// The exception is Stat/Track objects, which are also created/used +// internally to calculate and show statistics during a run. +// +struct Track { + // Threads maintain the total thread operation and total latency they've + // experienced. + + uint64_t ops; // Total operations */ + uint64_t latency_ops; // Total ops sampled for latency + uint64_t latency; // Total latency */ + + // Minimum/maximum latency, shared with the monitor thread, that is, the + // monitor thread clears it so it's recalculated again for each period. + + uint32_t min_latency; // Minimum latency (uS) + uint32_t max_latency; // Maximum latency (uS) + + Track(bool latency_tracking = false); + Track(const Track &other); + ~Track(); + + void add(Track&, bool reset = false); + void assign(const Track&); + uint64_t average_latency() const; + void clear(); + void incr(); + void incr_with_latency(uint64_t usecs); + void smooth(const Track&); + void subtract(const Track&); + void track_latency(bool); + bool track_latency() const { return (us != NULL); } + + void _get_us(long *); + void _get_ms(long *); + void _get_sec(long *); + +private: + // Latency buckets. From python, accessed via methods us(), ms(), sec() + uint32_t *us; // < 1us ... 1000us + uint32_t *ms; // < 1ms ... 1000ms + uint32_t *sec; // < 1s 2s ... 100s + + Track & operator=(const Track &other); // use explicit assign method +}; + +struct Stats { + Track insert; + Track not_found; + Track read; + Track remove; + Track update; + Track truncate; + + Stats(bool latency = false); + Stats(const Stats &other); + ~Stats(); + + void add(Stats&, bool reset = false); + void assign(const Stats&); + void clear(); + void describe(std::ostream &os) const; +#ifndef SWIG + void final_report(std::ostream &os, timespec &totalsecs) const; + void report(std::ostream &os) const; +#endif + void smooth(const Stats&); + void subtract(const Stats&); + void track_latency(bool); + bool track_latency() const { return (insert.track_latency()); } + +private: + Stats & operator=(const Stats &other); // use explicit assign method +}; + +// A Context tracks the current record number for each uri, used +// for key generation. +// +struct Context { + bool _verbose; + ContextInternal *_internal; + + Context(); + ~Context(); + void describe(std::ostream &os) const { + os << "Context: verbose " << (_verbose ? "true" : "false"); + } + +#ifndef SWIG + Context& operator=(const Context &other); +#endif +}; + +// To prevent silent errors, this class is set up in Python so that new +// properties are prevented, only existing properties can be set. +// +struct TableOptions { + int key_size; + int value_size; + + TableOptions(); + TableOptions(const TableOptions &other); + ~TableOptions(); + + void describe(std::ostream &os) const { + os << "key_size " << key_size; + os << ", value_size " << value_size; + } + + std::string help() const { return _options.help(); } + std::string help_description(const char *option_name) const { + return _options.help_description(option_name); } + std::string help_type(const char *option_name) const { + return _options.help_type(option_name); } + +private: + OptionsList _options; +}; + +struct Table { + TableOptions options; + std::string _uri; + TableInternal *_internal; + + /* XXX select table from range */ + + Table(); + Table(const char *tablename); + Table(const Table &other); + ~Table(); + + void describe(std::ostream &os) const; + +#ifndef SWIG + Table& operator=(const Table &other); +#endif +}; + +struct Key { + typedef enum { + KEYGEN_AUTO, KEYGEN_APPEND, KEYGEN_PARETO, KEYGEN_UNIFORM } KeyType; + KeyType _keytype; + int _size; + + /* XXX specify more about key distribution */ + Key() : _keytype(KEYGEN_AUTO), _size(0) {} + Key(KeyType keytype, int size) : _keytype(keytype), _size(size) {} + Key(const Key &other) : _keytype(other._keytype), _size(other._size) {} + ~Key() {} + + void describe(std::ostream &os) const { + os << "Key: type " << _keytype << ", size " << _size; } +}; + +struct Value { + int _size; + + /* XXX specify how value is calculated */ + Value() : _size(0) {} + Value(int size) : _size(size) {} + Value(const Value &other) : _size(other._size) {} + ~Value() {} + + void describe(std::ostream &os) const { os << "Value: size " << _size; } +}; + +struct Operation { + enum OpType { + OP_NONE, OP_INSERT, OP_REMOVE, OP_SEARCH, OP_UPDATE }; + OpType _optype; + + Table _table; + Key _key; + Value _value; + Transaction *_transaction; + std::vector *_group; + int _repeatgroup; + +#ifndef SWIG + int _keysize; // derived from Key._size and Table.options.key_size + int _valuesize; + uint64_t _keymax; + uint64_t _valuemax; +#endif + + Operation(); + Operation(OpType optype, Table table, Key key, Value value); + Operation(OpType optype, Table table, Key key); + Operation(OpType optype, Table table); + Operation(const Operation &other); + ~Operation(); + + void describe(std::ostream &os) const; +#ifndef SWIG + Operation& operator=(const Operation &other); + void get_static_counts(Stats &stats, int multiplier); + void kv_compute_max(bool); + void kv_gen(bool, uint64_t, char *) const; + void kv_size_buffer(bool iskey, size_t &size) const; + void size_check() const; +#endif +}; + +// To prevent silent errors, this class is set up in Python so that new +// properties are prevented, only existing properties can be set. +// +struct ThreadOptions { + std::string name; + double throttle; + double throttle_burst; + + ThreadOptions(); + ThreadOptions(const ThreadOptions &other); + ~ThreadOptions(); + + void describe(std::ostream &os) const { + os << "throttle " << throttle; + } + + std::string help() const { return _options.help(); } + std::string help_description(const char *option_name) const { + return _options.help_description(option_name); } + std::string help_type(const char *option_name) const { + return _options.help_type(option_name); } + +private: + OptionsList _options; +}; + +// This is a list of threads, which may be used in the Workload constructor. +// It participates with ThreadList defined on the SWIG/Python side and +// some Python operators added to Thread to allow Threads to be easily +// composed using '+' and multiplied (by integer counts) using '*'. +// Users of the workgen API in Python don't ever need to use +// ThreadListWrapper or ThreadList. +struct ThreadListWrapper { + std::vector _threads; + + ThreadListWrapper() : _threads() {} + ThreadListWrapper(const ThreadListWrapper &other) : + _threads(other._threads) {} + ThreadListWrapper(const std::vector &threads) : _threads(threads) {} + void extend(const ThreadListWrapper &); + void append(const Thread &); + void multiply(const int); +}; + +struct Thread { + ThreadOptions options; + Operation _op; + + Thread(); + Thread(const Operation &op); + Thread(const Thread &other); + ~Thread(); + + void describe(std::ostream &os) const; +}; + +struct Transaction { + bool _rollback; + std::string _begin_config; + std::string _commit_config; + + Transaction(const char *_config = NULL) : _rollback(false), + _begin_config(_config == NULL ? "" : _config), _commit_config() {} + + void describe(std::ostream &os) const { + os << "Transaction: "; + if (_rollback) + os << "(rollback) "; + os << "begin_config: " << _begin_config; + if (!_commit_config.empty()) + os << ", commit_config: " << _commit_config; + } +}; + +// To prevent silent errors, this class is set up in Python so that new +// properties are prevented, only existing properties can be set. +// +struct WorkloadOptions { + int max_latency; + std::string report_file; + int report_interval; + int run_time; + int sample_interval; + int sample_rate; + + WorkloadOptions(); + WorkloadOptions(const WorkloadOptions &other); + ~WorkloadOptions(); + + void describe(std::ostream &os) const { + os << "run_time " << run_time; + os << ", report_interval " << report_interval; + } + + std::string help() const { return _options.help(); } + std::string help_description(const char *option_name) const { + return _options.help_description(option_name); } + std::string help_type(const char *option_name) const { + return _options.help_type(option_name); } + +private: + OptionsList _options; +}; + +struct Workload { + WorkloadOptions options; + Stats stats; + Context *_context; + std::vector _threads; + + Workload(Context *context, const ThreadListWrapper &threadlist); + Workload(Context *context, const Thread &thread); + Workload(const Workload &other); + ~Workload(); + +#ifndef SWIG + Workload& operator=(const Workload &other); +#endif + + void describe(std::ostream &os) const { + os << "Workload: "; + _context->describe(os); + os << ", "; + options.describe(os); + os << ", [" << std::endl; + for (std::vector::const_iterator i = _threads.begin(); i != _threads.end(); i++) { + os << " "; i->describe(os); os << std::endl; + } + os << "]"; + } + int run(WT_CONNECTION *conn); +}; + +}; diff --git a/bench/workgen/workgen.swig b/bench/workgen/workgen.swig new file mode 100644 index 00000000000..0f74942169c --- /dev/null +++ b/bench/workgen/workgen.swig @@ -0,0 +1,233 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * workgen.swig + * The SWIG interface file defining the workgen python API. + */ + +%include "typemaps.i" +%include "std_vector.i" +%include "std_string.i" +%include "stdint.i" +%include "attribute.i" +%include "carrays.i" + +/* We only need to reference WiredTiger types. */ +%import "wiredtiger.h" + +%{ +#include +#include +#include +#include "wiredtiger.h" +#include "workgen.h" +#include "workgen_int.h" +%} + +%pythoncode %{ +import numbers +%} + +%exception { + try { + $action + } + catch (workgen::WorkgenException &wge) { + SWIG_exception_fail(SWIG_RuntimeError, wge._str.c_str()); + } +} + +/* + * Some functions are long running, turn off signal handling that was enabled + * by the Python interpreter. This means that a signal handler coded in Python + * won't work when spanning a call to one of these long running functions, but + * it's doubtful our test scripts need signals at all. This could be made to + * work, it's just not worth the trouble. + */ +%define InterruptableFunction(funcname) +%exception funcname { + try { + void (*savesig)(int) = signal(SIGINT, SIG_DFL); + $action + (void)signal(SIGINT, savesig); + } + catch (workgen::WorkgenException &wge) { + SWIG_exception_fail(SWIG_RuntimeError, wge._str.c_str()); + } +} +%enddef + +/* + * Define a __str__ function for all public workgen classes. + */ +%define WorkgenClass(classname) +%extend workgen::classname { + const std::string __str__() { + std::ostringstream out; + $self->describe(out); + return out.str(); + } +}; +%enddef + +/* + * To forestall errors, make it impossible to add new attributes to certain + * classes. This trick relies on the implementation of SWIG providing + * predictably named functions in the _workgen namespace to set attributes. + */ +%define WorkgenFrozenClass(classname) +%extend workgen::classname { +%pythoncode %{ + def __setattr__(self, attr, val): + if getattr(self, attr) == None: + raise AttributeError("'" + #classname + + "' object has no attribute '" + attr + "'") + f = _workgen.__dict__[#classname + '_' + attr + '_set'] + f(self, val) +%} +}; +%enddef + +InterruptableFunction(workgen::execute) +InterruptableFunction(workgen::Workload::run) + +%module workgen +/* Parse the header to generate wrappers. */ +%include "workgen.h" + +%template(OpList) std::vector; +%template(ThreadList) std::vector; +%array_class(uint32_t, uint32Array); +%array_class(long, longArray); + +WorkgenClass(Key) +WorkgenClass(Operation) +WorkgenClass(Stats) +WorkgenClass(Table) +WorkgenClass(TableOptions) +WorkgenClass(Thread) +WorkgenClass(ThreadOptions) +WorkgenClass(Transaction) +WorkgenClass(Value) +WorkgenClass(Workload) +WorkgenClass(WorkloadOptions) +WorkgenClass(Context) + +WorkgenFrozenClass(TableOptions) +WorkgenFrozenClass(ThreadOptions) +WorkgenFrozenClass(WorkloadOptions) + +%extend workgen::Operation { +%pythoncode %{ + def __mul__(self, other): + if not isinstance(other, numbers.Integral): + raise Exception('Operation.__mul__ requires an integral number') + op = Operation() + op._group = OpList([self]) + op._repeatgroup = other + return op + + __rmul__ = __mul__ + + def __add__(self, other): + if not isinstance(other, Operation): + raise Exception('Operation.__sum__ requires an Operation') + if self._group == None or self._repeatgroup != 1 or self._transaction != None: + op = Operation() + op._group = OpList([self, other]) + op._repeatgroup = 1 + return op + else: + self._group.append(other) + return self +%} +}; + +%extend workgen::Thread { +%pythoncode %{ + def __mul__(self, other): + if not isinstance(other, numbers.Integral): + raise Exception('Thread.__mul__ requires an integral number') + return ThreadListWrapper(ThreadList([self] * other)) + + __rmul__ = __mul__ + + def __add__(self, other): + if type(self) != type(other): + raise Exception('Thread.__sum__ requires an Thread') + return ThreadListWrapper(ThreadList([self, other])) +%} +}; + +%extend workgen::ThreadListWrapper { +%pythoncode %{ + def __mul__(self, other): + if not isinstance(other, numbers.Integral): + raise Exception('ThreadList.__mul__ requires an integral number') + tlw = ThreadListWrapper(self) + tlw.multiply(other) + return tlw + + __rmul__ = __mul__ + + def __add__(self, other): + tlw = ThreadListWrapper(self) + if isinstance(other, ThreadListWrapper): + tlw.extend(other) + elif isinstance(other, Thread): + tlw.append(other) + else: + raise Exception('ThreadList.__sum__ requires an Thread or ThreadList') + return tlw +%} +}; + +%extend workgen::Track { +%pythoncode %{ + def __longarray(self, size): + result = longArray(size) + result.__len__ = lambda: size + return result + + def us(self): + result = self.__longarray(1000) + self._get_us(result) + return result + + def ms(self): + result = self.__longarray(1000) + self._get_ms(result) + return result + + def sec(self): + result = self.__longarray(100) + self._get_sec(result) + return result +%} +}; diff --git a/bench/workgen/workgen/__init__.py b/bench/workgen/workgen/__init__.py new file mode 100644 index 00000000000..00e8f257546 --- /dev/null +++ b/bench/workgen/workgen/__init__.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# __init__.py +# initialization for workgen module +# +import os, sys + +# After importing the SWIG-generated file, copy all symbols from from it +# to this module so they will appear in the workgen namespace. +me = sys.modules[__name__] +sys.path.append(os.path.dirname(__file__)) # needed for Python3 +import workgen, workgen_util +for module in workgen: + for name in dir(module): + value = getattr(module, name) + setattr(me, name, value) diff --git a/bench/workgen/workgen_func.c b/bench/workgen/workgen_func.c new file mode 100644 index 00000000000..6a465855875 --- /dev/null +++ b/bench/workgen/workgen_func.c @@ -0,0 +1,86 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "wiredtiger.h" +#include "test_util.h" +#include "workgen_func.h" + +/* This is an opaque type handle. */ +typedef struct workgen_random_state {} workgen_random_state; + +/* + * These functions call their WiredTiger equivalents. + */ +uint32_t +workgen_atomic_add32(uint32_t *vp, uint32_t v) +{ + return (__wt_atomic_add32(vp, v)); +} + +uint64_t +workgen_atomic_add64(uint64_t *vp, uint64_t v) +{ + return (__wt_atomic_add64(vp, v)); +} + +void +workgen_epoch(struct timespec *tsp) +{ + __wt_epoch(NULL, tsp); +} + +uint32_t +workgen_random(workgen_random_state volatile * rnd_state) +{ + return (__wt_random((WT_RAND_STATE *)rnd_state)); +} + +int +workgen_random_alloc(WT_SESSION *session, workgen_random_state **rnd_state) +{ + WT_RAND_STATE *state; + state = malloc(sizeof(WT_RAND_STATE)); + if (state == NULL) { + *rnd_state = NULL; + return (ENOMEM); + } + __wt_random_init_seed((WT_SESSION_IMPL *)session, state); + *rnd_state = (workgen_random_state *)state; + return (0); +} + +void +workgen_random_free(workgen_random_state *rnd_state) +{ + free(rnd_state); +} + +extern void +workgen_u64_to_string_zf(uint64_t n, char *buf, size_t len) +{ + u64_to_string_zf(n, buf, len); +} diff --git a/bench/workgen/workgen_func.h b/bench/workgen/workgen_func.h new file mode 100644 index 00000000000..20ebf2632cc --- /dev/null +++ b/bench/workgen/workgen_func.h @@ -0,0 +1,44 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +struct workgen_random_state; + +extern uint32_t +workgen_atomic_add32(uint32_t *vp, uint32_t v); +extern uint64_t +workgen_atomic_add64(uint64_t *vp, uint64_t v); +extern void +workgen_epoch(struct timespec *tsp); +extern uint32_t +workgen_random(struct workgen_random_state volatile *rnd_state); +extern int +workgen_random_alloc(WT_SESSION *session, + struct workgen_random_state **rnd_state); +extern void +workgen_random_free(struct workgen_random_state *rnd_state); +extern void +workgen_u64_to_string_zf(uint64_t n, char *buf, size_t len); diff --git a/bench/workgen/workgen_int.h b/bench/workgen/workgen_int.h new file mode 100644 index 00000000000..01fb727691b --- /dev/null +++ b/bench/workgen/workgen_int.h @@ -0,0 +1,205 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#ifndef SWIG +extern "C" { +#include "workgen_func.h" +} +#endif + +namespace workgen { + +// A 'tint' or ('table integer') is a unique small value integer +// assigned to each table URI in use. Currently, we assign it once, +// and its value persists through the lifetime of the Context. +typedef uint32_t tint_t; + +struct ThreadRunner; +struct WorkloadRunner; + +// A exception generated by the workgen classes. Methods generally return an +// int errno, so this is useful primarily for notifying the caller about +// failures in constructors. +struct WorkgenException { + std::string _str; + WorkgenException() : _str() {} + WorkgenException(int err, const char *msg = NULL) : _str() { + if (err != 0) + _str += wiredtiger_strerror(err); + if (msg != NULL) { + if (!_str.empty()) + _str += ": "; + _str += msg; + } + } + WorkgenException(const WorkgenException &other) : _str(other._str) {} + ~WorkgenException() {} +}; + +struct Throttle { + ThreadRunner &_runner; + double _throttle; + double _burst; + timespec _next_div; + int64_t _ops_delta; + uint64_t _ops_prev; // previously returned value + uint64_t _ops_per_div; // statically calculated. + uint64_t _ms_per_div; // statically calculated. + bool _started; + + Throttle(ThreadRunner &runner, double throttle, double burst); + ~Throttle(); + + // Called with the number of operations since the last throttle. + // Sleeps for any needed amount and returns the number operations the + // caller should perform before the next call to throttle. + int throttle(uint64_t op_count, uint64_t *op_limit); +}; + +// There is one of these per Thread object. It exists for the duration of a +// call to Workload::run() method. +struct ThreadRunner { + int _errno; + WorkgenException _exception; + Thread *_thread; + Context *_context; + ContextInternal *_icontext; + Workload *_workload; + WorkloadRunner *_wrunner; + workgen_random_state *_rand_state; + Throttle *_throttle; + uint64_t _throttle_ops; + uint64_t _throttle_limit; + bool _in_transaction; + uint32_t _number; + Stats _stats; + + typedef enum { + USAGE_READ = 0x1, USAGE_WRITE = 0x2, USAGE_MIXED = 0x4 } Usage; + std::map _table_usage; // value is Usage + WT_CURSOR **_cursors; // indexed by tint_t + volatile bool _stop; + WT_SESSION *_session; + char *_keybuf; + char *_valuebuf; + bool _repeat; + + ThreadRunner(); + ~ThreadRunner(); + + void free_all(); + static int cross_check(std::vector &runners); + + int close_all(); + int create_all(WT_CONNECTION *conn); + void get_static_counts(Stats &); + int open_all(); + int run(); + + void op_create_all(Operation *, size_t &keysize, size_t &valuesize); + uint64_t op_get_key_recno(Operation *, tint_t tint); + void op_get_static_counts(Operation *, Stats &, int); + int op_run(Operation *); + +#ifdef _DEBUG + std::stringstream _debug_messages; + std::string get_debug(); +#define DEBUG_CAPTURE(runner, expr) runner._debug_messages << expr +#else +#define DEBUG_CAPTURE(runner, expr) +#endif +}; + +struct Monitor { + int _errno; + WorkgenException _exception; + WorkloadRunner &_wrunner; + volatile bool _stop; + pthread_t _handle; + std::ostream *_out; + + Monitor(WorkloadRunner &wrunner); + ~Monitor(); + int run(); +}; + +struct ContextInternal { + std::map _tint; // maps uri -> tint_t + std::map _table_names; // reverse mapping + uint64_t *_recno; // # entries per tint_t + uint32_t _recno_alloced; // length of allocated _recno + tint_t _tint_last; // last tint allocated + // unique id per context, to work with multiple contexts, starts at 1. + uint32_t _context_count; + + ContextInternal(); + ~ContextInternal(); + int create_all(); +}; + +struct TableInternal { + tint_t _tint; + uint32_t _context_count; + + TableInternal(); + TableInternal(const TableInternal &other); + ~TableInternal(); +}; + +// An instance of this class only exists for the duration of one call to a +// Workload::run() method. +struct WorkloadRunner { + Workload *_workload; + std::vector _trunners; + std::ostream *_report_out; + std::string _wt_home; + timespec _start; + + WorkloadRunner(Workload *); + ~WorkloadRunner(); + int run(WT_CONNECTION *conn); + +private: + int close_all(); + int create_all(WT_CONNECTION *conn, Context *context); + void final_report(timespec &); + void get_stats(Stats *stats); + int open_all(); + void open_report_file(std::ofstream &, const char *, const char *); + void report(time_t, time_t, Stats *stats); + int run_all(); + + WorkloadRunner(const WorkloadRunner &); // disallowed + WorkloadRunner& operator=(const WorkloadRunner &other); // disallowed +}; + +}; diff --git a/bench/workgen/workgen_time.h b/bench/workgen/workgen_time.h new file mode 100644 index 00000000000..f33eb64d9c9 --- /dev/null +++ b/bench/workgen/workgen_time.h @@ -0,0 +1,201 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#define THOUSAND (1000ULL) +#define MILLION (1000000ULL) +#define BILLION (1000000000ULL) + +#define NSEC_PER_SEC BILLION +#define USEC_PER_SEC MILLION +#define MSEC_PER_SEC THOUSAND + +#define ns_to_ms(v) ((v) / MILLION) +#define ns_to_sec(v) ((v) / BILLION) +#define ns_to_us(v) ((v) / THOUSAND) + +#define us_to_ms(v) ((v) / THOUSAND) +#define us_to_ns(v) ((v) * THOUSAND) +#define us_to_sec(v) ((v) / MILLION) + +#define ms_to_ns(v) ((v) * MILLION) +#define ms_to_us(v) ((v) * THOUSAND) +#define ms_to_sec(v) ((v) / THOUSAND) + +#define sec_to_ns(v) ((v) * BILLION) +#define sec_to_us(v) ((v) * MILLION) +#define sec_to_ms(v) ((v) * THOUSAND) + +inline std::ostream& +operator<<(std::ostream &os, const timespec &ts) +{ + char oldfill; + std::streamsize oldwidth; + + os << ts.tv_sec << "."; + oldfill = os.fill('0'); + oldwidth = os.width(3); + os << (int)ns_to_ms(ts.tv_nsec); + os.fill(oldfill); + os.width(oldwidth); + return (os); +} + +inline timespec +operator-(const timespec &lhs, const timespec &rhs) +{ + timespec ts; + + if (lhs.tv_nsec < rhs.tv_nsec) { + ts.tv_sec = lhs.tv_sec - rhs.tv_sec - 1; + ts.tv_nsec = lhs.tv_nsec - rhs.tv_nsec + NSEC_PER_SEC; + } else { + ts.tv_sec = lhs.tv_sec - rhs.tv_sec; + ts.tv_nsec = lhs.tv_nsec - rhs.tv_nsec; + } + return (ts); +} + +inline timespec +operator+(const timespec &lhs, const int n) +{ + timespec ts = lhs; + ts.tv_sec += n; + return (ts); +} + +inline bool +operator<(const timespec &lhs, const timespec &rhs) +{ + if (lhs.tv_sec == rhs.tv_sec) + return (lhs.tv_nsec < rhs.tv_nsec); + else + return (lhs.tv_sec < rhs.tv_sec); +} + +inline bool +operator>(const timespec &lhs, const timespec &rhs) +{ + if (lhs.tv_sec == rhs.tv_sec) + return (lhs.tv_nsec > rhs.tv_nsec); + else + return (lhs.tv_sec > rhs.tv_sec); +} + +inline bool +operator>=(const timespec &lhs, const timespec &rhs) +{ + return (!(lhs < rhs)); +} + +inline bool +operator<=(const timespec &lhs, const timespec &rhs) +{ + return (!(lhs > rhs)); +} + +inline bool +operator==(const timespec &lhs, int n) +{ + return (lhs.tv_sec == n && lhs.tv_nsec == 0); +} + +inline bool +operator!=(const timespec &lhs, int n) +{ + return (lhs.tv_sec != n || lhs.tv_nsec != 0); +} + +inline timespec & +operator+=(timespec &lhs, const int n) +{ + lhs.tv_sec += n; + return (lhs); +} + +inline bool +operator==(const timespec &lhs, const timespec &rhs) +{ + return (lhs.tv_sec == rhs.tv_sec && lhs.tv_nsec == rhs.tv_nsec); +} + +inline timespec & +operator-=(timespec &lhs, const timespec &rhs) +{ + lhs.tv_sec -= rhs.tv_sec; + lhs.tv_nsec -= rhs.tv_nsec; + if (lhs.tv_nsec < 0) { + lhs.tv_nsec += NSEC_PER_SEC; + lhs.tv_sec -= 1; + } + return (lhs); +} + +inline timespec +ts_add_ms(const timespec &lhs, const uint64_t n) +{ + timespec ts; + + ts.tv_sec = lhs.tv_sec + ms_to_sec(n); + ts.tv_nsec = lhs.tv_nsec + ms_to_ns(n % THOUSAND); + while ((unsigned long)ts.tv_nsec > NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return (ts); +} + +inline void +ts_assign(timespec &lhs, const timespec &rhs) +{ + lhs.tv_sec = rhs.tv_sec; + lhs.tv_nsec = rhs.tv_nsec; +} + +inline void +ts_clear(timespec &ts) +{ + ts.tv_sec = 0; + ts.tv_nsec = 0; +} + +inline uint64_t +ts_sec(const timespec &ts) +{ + return (ns_to_sec(ts.tv_nsec) + ts.tv_sec); +} + +inline uint64_t +ts_ms(const timespec &ts) +{ + return (ns_to_ms(ts.tv_nsec) + sec_to_ms(ts.tv_sec)); +} + +inline uint64_t +ts_us(const timespec &ts) +{ + return (ns_to_us(ts.tv_nsec) + sec_to_us(ts.tv_sec)); +} diff --git a/build_posix/Make.subdirs b/build_posix/Make.subdirs index 4ecec37ca6c..5e30b28b3d6 100644 --- a/build_posix/Make.subdirs +++ b/build_posix/Make.subdirs @@ -45,4 +45,5 @@ test/syscall test/thread # Benchmark programs. +bench/workgen PYTHON bench/wtperf diff --git a/dist/s_string.ok b/dist/s_string.ok index ce4e9f963b0..7c409e0e46d 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -353,6 +353,8 @@ TORTIOUS TSO TXN TXNC +ThreadList +ThreadListWrapper Timespec Timestamp TryCV @@ -1194,6 +1196,7 @@ txnid txnmin txt typedef +typemaps uB uS ui @@ -1274,6 +1277,7 @@ whitespace wiredTiger wiredtiger workFactor +workgen wrapup writeable writelock diff --git a/dist/s_whitespace b/dist/s_whitespace index 0de59bc5825..874074dfb50 100755 --- a/dist/s_whitespace +++ b/dist/s_whitespace @@ -8,6 +8,7 @@ trap 'rm -f $t' 0 1 2 3 13 15 # into a single line, discard trailing empty lines. whitespace() { + ! head $1 | grep -q 'automatically generated by SWIG' || return sed -e 's/[ ][ ]*$//' < $1 | \ cat -s | \ sed -e '${' -e '/^$/d' -e '}' > $t -- cgit v1.2.1 From 0772a1377b89f6a01627dc14700fc63e6b39999a Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Fri, 28 Apr 2017 13:36:08 -0400 Subject: Revert "WT-3142 Add a workload generator application. (#3255)" (#3407) This reverts commit 48c7cf59ccf583369fa98733b388983bd4abb70e. --- bench/workgen/Makefile.am | 32 - bench/workgen/runner/example_simple.py | 31 - bench/workgen/runner/example_txn.py | 29 - bench/workgen/runner/insert_test.py | 94 -- bench/workgen/runner/multi_btree_heavy_stress.py | 102 -- bench/workgen/runner/runner/__init__.py | 92 -- bench/workgen/runner/runner/core.py | 101 -- bench/workgen/runner/runner/latency.py | 122 -- bench/workgen/runner/small_btree.py | 27 - bench/workgen/setup.py | 70 - bench/workgen/workgen.cxx | 1605 ---------------------- bench/workgen/workgen.h | 410 ------ bench/workgen/workgen.swig | 233 ---- bench/workgen/workgen/__init__.py | 42 - bench/workgen/workgen_func.c | 86 -- bench/workgen/workgen_func.h | 44 - bench/workgen/workgen_int.h | 205 --- bench/workgen/workgen_time.h | 201 --- build_posix/Make.subdirs | 1 - dist/s_string.ok | 4 - dist/s_whitespace | 1 - 21 files changed, 3532 deletions(-) delete mode 100644 bench/workgen/Makefile.am delete mode 100755 bench/workgen/runner/example_simple.py delete mode 100644 bench/workgen/runner/example_txn.py delete mode 100644 bench/workgen/runner/insert_test.py delete mode 100644 bench/workgen/runner/multi_btree_heavy_stress.py delete mode 100644 bench/workgen/runner/runner/__init__.py delete mode 100644 bench/workgen/runner/runner/core.py delete mode 100644 bench/workgen/runner/runner/latency.py delete mode 100644 bench/workgen/runner/small_btree.py delete mode 100644 bench/workgen/setup.py delete mode 100644 bench/workgen/workgen.cxx delete mode 100644 bench/workgen/workgen.h delete mode 100644 bench/workgen/workgen.swig delete mode 100644 bench/workgen/workgen/__init__.py delete mode 100644 bench/workgen/workgen_func.c delete mode 100644 bench/workgen/workgen_func.h delete mode 100644 bench/workgen/workgen_int.h delete mode 100644 bench/workgen/workgen_time.h diff --git a/bench/workgen/Makefile.am b/bench/workgen/Makefile.am deleted file mode 100644 index cfe8c940cee..00000000000 --- a/bench/workgen/Makefile.am +++ /dev/null @@ -1,32 +0,0 @@ -AM_CPPFLAGS = -I$(top_builddir) -AM_CPPFLAGS += -I$(top_srcdir)/src/include -AM_CPPFLAGS +=-I$(top_srcdir)/test/utility - -PYSRC = $(top_srcdir)/bench/workgen -PYDIRS = -t $(abs_builddir) -I $(abs_top_srcdir):$(abs_top_builddir) -L $(abs_top_builddir)/.libs -all-local: _workgen.so libworkgen.la -libworkgen_la_SOURCES = workgen.cxx workgen_func.c -noinst_LTLIBRARIES = libworkgen.la - -# We keep generated Python sources under bench/workgen. -$(PYSRC)/workgen_wrap.cxx: $(PYSRC)/workgen.h $(PYSRC)/workgen.swig - (cd $(PYSRC) && \ - $(SWIG) -c++ -python -threads -O -Wall -I$(abs_top_builddir) -outdir ./workgen workgen.swig) - -_workgen.so: $(top_builddir)/libwiredtiger.la $(PYSRC)/workgen_wrap.cxx libworkgen.la $(PYSRC)/workgen.h $(PYSRC)/workgen_time.h - (cd $(PYSRC) && \ - $(PYTHON) setup.py build_ext -f -b $(abs_builddir) $(PYDIRS)) - -install-exec-local: - (cd $(PYSRC) && \ - $(PYTHON) setup.py build_py -d $(abs_builddir)/build && \ - $(PYTHON) setup.py build_ext -f -b $(abs_builddir)/build $(PYDIRS) && \ - $(PYTHON) setup.py install_lib -b $(abs_builddir)/build --skip-build $(PYTHON_INSTALL_ARG)) - -# We build in different places for an install vs running from the tree: -# clean up both. Don't rely on "setup.py clean" -- everything that should -# be removed is created under the build directory. -clean-local: - rm -rf build _workgen.so workgen_wrap.o WT_TEST - -TESTS = run-ex_access diff --git a/bench/workgen/runner/example_simple.py b/bench/workgen/runner/example_simple.py deleted file mode 100755 index de944cbe29e..00000000000 --- a/bench/workgen/runner/example_simple.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/python -from runner import * -from wiredtiger import * -from workgen import * - -def show(tname): - print('') - print('<><><><> ' + tname + ' <><><><>') - c = s.open_cursor(tname, None) - for k,v in c: - print('key: ' + k) - print('value: ' + v) - print('<><><><><><><><><><><><>') - c.close() - -context = Context() -conn = wiredtiger_open("WT_TEST", "create,cache_size=1G") -s = conn.open_session() -tname = 'table:simple' -s.create(tname, 'key_format=S,value_format=S') - -ops = Operation(Operation.OP_INSERT, Table(tname), Key(Key.KEYGEN_APPEND, 10), Value(40)) -thread = Thread(ops) -workload = Workload(context, thread) -workload.run(conn) -show(tname) - -thread = Thread(ops * 5) -workload = Workload(context, thread) -workload.run(conn) -show(tname) diff --git a/bench/workgen/runner/example_txn.py b/bench/workgen/runner/example_txn.py deleted file mode 100644 index ef1d7a93941..00000000000 --- a/bench/workgen/runner/example_txn.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/python -from runner import * -from wiredtiger import * -from workgen import * - -conn = wiredtiger_open("WT_TEST", "create,cache_size=500MB") -s = conn.open_session() -tname = "table:test" -s.create(tname, 'key_format=S,value_format=S') -table = Table(tname) -table.options.key_size = 20 -table.options.value_size = 100 - -context = Context() -op = Operation(Operation.OP_INSERT, table) -thread = Thread(op * 500000) -pop_workload = Workload(context, thread) -print('populate:') -pop_workload.run(conn) - -opread = Operation(Operation.OP_SEARCH, table) -opwrite = Operation(Operation.OP_INSERT, table) -treader = Thread(opread) -twriter = Thread(txn(opwrite * 2)) -workload = Workload(context, treader * 8 + twriter * 2) -workload.options.run_time = 10 -workload.options.report_interval = 5 -print('transactional write workload:') -workload.run(conn) diff --git a/bench/workgen/runner/insert_test.py b/bench/workgen/runner/insert_test.py deleted file mode 100644 index 30f2818e91e..00000000000 --- a/bench/workgen/runner/insert_test.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/python -from runner import * -from wiredtiger import * -from workgen import * - -def tablename(id): - return "table:test%06d" % id - -def show(tname): - print('') - print('<><><><> ' + tname + ' <><><><>') - c = s.open_cursor(tname, None) - for k,v in c: - print('key: ' + k) - print('value: ' + v) - print('<><><><><><><><><><><><>') - c.close() - -def expectException(expr): - gotit = False - try: - expr() - except BaseException as e: - print('got expected exception: ' + str(e)) - gotit = True - if not gotit: - raise Exception("missing expected exception") - -context = Context() -conn = wiredtiger_open("WT_TEST", "create,cache_size=1G") -s = conn.open_session() -tname0 = tablename(0) -tname1 = tablename(1) -s.create(tname0, 'key_format=S,value_format=S') -s.create(tname1, 'key_format=S,value_format=S') - -ops = Operation(Operation.OP_INSERT, Table(tname0), Key(Key.KEYGEN_APPEND, 10), Value(100)) -workload = Workload(context, Thread(ops)) - -print('RUN1') -workload.run(conn) -show(tname0) - -# The context has memory of how many keys are in all the tables. -# truncate goes behind context's back, but it doesn't matter for -# an insert-only test. -s.truncate(tname0, None, None) - -# Show how to 'multiply' operations -op = Operation(Operation.OP_INSERT, Table(tname0), Key(Key.KEYGEN_APPEND, 10), Value(100)) -op2 = Operation(Operation.OP_INSERT, Table(tname1), Key(Key.KEYGEN_APPEND, 20), Value(30)) -o = op2 * 10 -print 'op is: ' + str(op) -print 'multiplying op is: ' + str(o) -thread0 = Thread(o + op + op) -workload = Workload(context, thread0) -print('RUN2') -workload.run(conn) -show(tname0) -show(tname1) - -s.truncate(tname0, None, None) -s.truncate(tname1, None, None) - -# operations can be multiplied, added in any combination. -op += Operation(Operation.OP_INSERT, Table(tname0), Key(Key.KEYGEN_APPEND, 10), Value(10)) -op *= 2 -op += Operation(Operation.OP_INSERT, Table(tname0), Key(Key.KEYGEN_APPEND, 10), Value(10)) -thread0 = Thread(op * 10 + op2 * 20) -workload = Workload(context, thread0) -print('RUN3') -workload.run(conn) -show(tname0) -show(tname1) - -print('workload is ' + str(workload)) -print('thread0 is ' + str(thread0)) - -def assignit(k, n): - k._size = n - -expectException(lambda: Operation( - Operation.OP_INSERT, Table('foo'), Key(Key.KEYGEN_APPEND, 10))) -# we don't catch this exception here, but in Workload.run() -k = Key(Key.KEYGEN_APPEND, 1) -assignit(k, 30) -assignit(k, 1) # we don't catch this exception here, but in Workload.run() -op = Operation(Operation.OP_INSERT, Table(tname0), k, Value(10)) -workload = Workload(context, Thread(op)) -print('RUN4') -expectException(lambda: workload.run(conn)) - -print('HELP:') -print(workload.options.help()) diff --git a/bench/workgen/runner/multi_btree_heavy_stress.py b/bench/workgen/runner/multi_btree_heavy_stress.py deleted file mode 100644 index 0993f60248d..00000000000 --- a/bench/workgen/runner/multi_btree_heavy_stress.py +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/python -# Drive a constant high workload through, even if WiredTiger isn't keeping -# up by dividing the workload across a lot of threads. This needs to be -# tuned to the particular machine so the workload is close to capacity in the -# steady state, but not overwhelming. -# -################ -# Note: as a proof of concept for workgen, this matches closely -# bench/wtperf/runner/multi-btree-read-heavy-stress.wtperf . -# Run time, #ops, #threads are ratcheted way down for testing. -# -from runner import * -from wiredtiger import * -from workgen import * - -def op_append(ops, op): - if ops == None: - ops = op - else: - ops += op - return ops - -def make_op(optype, table, key, value = None): - if value == None: - return Operation(optype, table, key) - else: - return Operation(optype, table, key, value) - -logkey = Key(Key.KEYGEN_APPEND, 8) ## should be 8 bytes format 'Q' -def operations(optype, tables, key, value = None, ops_per_txn = 0, logtable = None): - txn_list = [] - ops = None - nops = 0 - for table in tables: - ops = op_append(ops, make_op(optype, table, key, value)) - if logtable != None: - ops = op_append(ops, make_op(optype, logtable, logkey, value)) - nops += 1 - if ops_per_txn > 0 and nops % ops_per_txn == 0: - txn_list.append(txn(ops)) - ops = None - if ops_per_txn > 0: - if ops != None: - txn_list.append(txn(ops)) - ops = None - for t in txn_list: - ops = op_append(ops, t) - return ops - -context = Context() -## cache_size=20GB -conn_config="create,cache_size=1GB,session_max=1000,eviction=(threads_min=4,threads_max=8),log=(enabled=false),transaction_sync=(enabled=false),checkpoint_sync=true,checkpoint=(wait=60),statistics=(fast),statistics_log=(json,wait=1)" -table_config="allocation_size=4k,memory_page_max=10MB,prefix_compression=false,split_pct=90,leaf_page_max=32k,internal_page_max=16k,type=file,block_compressor=snappy" -conn_config += extensions_config(['compressors/snappy']) -conn = wiredtiger_open("WT_TEST", conn_config) -s = conn.open_session() - -tables = [] -for i in range(0, 8): - tname = "table:test" + str(i) - s.create(tname, 'key_format=S,value_format=S,' + table_config) - tables.append(Table(tname)) -tname = "table:log" -# TODO: use table_config for the log file? -s.create(tname, 'key_format=S,value_format=S,' + table_config) -logtable = Table(tname) - -##icount=200000000 / 8 -icount=20000 -ins_ops = operations(Operation.OP_INSERT, tables, Key(Key.KEYGEN_APPEND, 20), Value(500)) -thread = Thread(ins_ops * icount) -pop_workload = Workload(context, thread) -print('populate:') -pop_workload.run(conn) - -ins_ops = operations(Operation.OP_INSERT, tables, Key(Key.KEYGEN_APPEND, 20), Value(500), 0, logtable) -upd_ops = operations(Operation.OP_UPDATE, tables, Key(Key.KEYGEN_UNIFORM, 20), Value(500), 0, logtable) -read_ops = operations(Operation.OP_SEARCH, tables, Key(Key.KEYGEN_UNIFORM, 20), None, 3) - -ins_thread = Thread(ins_ops) -upd_thread = Thread(upd_ops) -read_thread = Thread(read_ops) -ins_thread.options.throttle = 250 -ins_thread.options.name = "Insert" -upd_thread.options.throttle = 250 -upd_thread.options.name = "Update" -read_thread.options.throttle = 1000 -read_thread.options.name = "Read" -##threads = [ins_thread] * 10 + [upd_thread] * 10 + [read_thread] * 80 -threads = ins_thread * 1 + upd_thread * 1 + read_thread * 2 -workload = Workload(context, threads) -##workload.options.run_time = 3600 -workload.options.run_time = 30 -workload.options.report_interval = 1 -workload.options.sample_interval = 5 -workload.options.sample_rate = 1 -print('heavy stress workload:') -workload.run(conn) - -latency_filename = conn.get_home() + '/latency.out' -print('for latency output, see: ' + latency_filename) -latency.workload_latency(workload, latency_filename) diff --git a/bench/workgen/runner/runner/__init__.py b/bench/workgen/runner/runner/__init__.py deleted file mode 100644 index 67b547bc51b..00000000000 --- a/bench/workgen/runner/runner/__init__.py +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-2017 MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# runner/__init__.py -# Used as a first import by runners, does any common initialization. -from __future__ import print_function - -import os, shutil, sys -thisdir = os.path.dirname(os.path.abspath(__file__)) -workgen_src = os.path.dirname(os.path.dirname(thisdir)) -wt_dir = os.path.dirname(os.path.dirname(workgen_src)) -wt_builddir = os.path.join(wt_dir, 'build_posix') - -def _prepend_env_path(pathvar, s): - last = '' - try: - last = ':' + os.environ[pathvar] - except: - pass - os.environ[pathvar] = s + last - -# Initialize the python path so needed modules can be imported. -# If the path already works, don't change it. -try: - import wiredtiger -except: - # We'll try hard to make the importing work, we'd like to runners - # to be executable directly without having to set environment variables. - sys.path.insert(0, os.path.join(wt_dir, 'lang', 'python')) - sys.path.insert(0, os.path.join(wt_builddir, 'lang', 'python')) - try: - import wiredtiger - except: - # If the .libs directory is not in our library search path, - # we need to set it and retry. However, the dynamic link - # library has already cached its value, our only option is - # to restart the Python interpreter. - if '_workgen_init' not in os.environ: - os.environ['_workgen_init'] = 'true' - dotlibs = os.path.join(wt_builddir, '.libs') - _prepend_env_path('LD_LIBRARY_PATH', dotlibs) - _prepend_env_path('DYLD_LIBRARY_PATH', dotlibs) - py_args = sys.argv - py_args.insert(0, sys.executable) - try: - os.execv(sys.executable, py_args) - except Exception, exception: - print('re-exec failed: ' + str(exception), file=sys.stderr) - print(' exec(' + sys.executable + ', ' + str(py_args) + ')') - print('Try adding "' + dotlibs + '" to the', file=sys.stderr) - print('LD_LIBRARY_PATH environment variable before running ' + \ - 'this program again.', file=sys.stderr) - sys.exit(1) - -try: - import workgen -except: - sys.path.insert(0, os.path.join(workgen_src, 'workgen')) - sys.path.insert(0, os.path.join(wt_builddir, 'bench', 'workgen')) - import workgen - -# Clear out the WT_TEST directory. -shutil.rmtree('WT_TEST', True) -os.mkdir('WT_TEST') - -from .core import txn, extensions_config -from .latency import workload_latency diff --git a/bench/workgen/runner/runner/core.py b/bench/workgen/runner/runner/core.py deleted file mode 100644 index a0f0d4d77cd..00000000000 --- a/bench/workgen/runner/runner/core.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-2017 MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# runner/core.py -# Core functions available to all runners -import glob, os -import workgen - -# txn -- -# Put the operation (and any suboperations) within a transaction. -def txn(op, config=None): - t = workgen.Transaction(config) - op._transaction = t - return op - -# Check for a local build that contains the wt utility. First check in -# current working directory, then in build_posix and finally in the disttop -# directory. This isn't ideal - if a user has multiple builds in a tree we -# could pick the wrong one. -def _wiredtiger_builddir(): - if os.path.isfile(os.path.join(os.getcwd(), 'wt')): - return os.getcwd() - - # The directory of this file should be within the distribution tree. - thisdir = os.path.dirname(os.path.abspath(__file__)) - wt_disttop = os.path.join(\ - thisdir, os.pardir, os.pardir, os.pardir, os.pardir) - if os.path.isfile(os.path.join(wt_disttop, 'wt')): - return wt_disttop - if os.path.isfile(os.path.join(wt_disttop, 'build_posix', 'wt')): - return os.path.join(wt_disttop, 'build_posix') - if os.path.isfile(os.path.join(wt_disttop, 'wt.exe')): - return wt_disttop - raise Exception('Unable to find useable WiredTiger build') - -# Return the wiredtiger_open extension argument for any needed shared library. -# Called with a list of extensions, e.g. -# [ 'compressors/snappy', 'encryptors/rotn=config_string' ] -def extensions_config(exts): - result = '' - extfiles = {} - errpfx = 'extensions_config' - builddir = _wiredtiger_builddir() - for ext in exts: - extconf = '' - if '=' in ext: - splits = ext.split('=', 1) - ext = splits[0] - extconf = '=' + splits[1] - splits = ext.split('/') - if len(splits) != 2: - raise Exception(errpfx + ": " + ext + - ": extension is not named /") - libname = splits[1] - dirname = splits[0] - pat = os.path.join(builddir, 'ext', - dirname, libname, '.libs', 'libwiredtiger_*.so') - filenames = glob.glob(pat) - if len(filenames) == 0: - raise Exception(errpfx + - ": " + ext + - ": no extensions library found matching: " + pat) - elif len(filenames) > 1: - raise Exception(errpfx + ": " + ext + - ": multiple extensions libraries found matching: " + pat) - complete = '"' + filenames[0] + '"' + extconf - if ext in extfiles: - if extfiles[ext] != complete: - raise Exception(errpfx + - ": non-matching extension arguments in " + - str(exts)) - else: - extfiles[ext] = complete - if len(extfiles) != 0: - result = ',extensions=[' + ','.join(extfiles.values()) + ']' - return result diff --git a/bench/workgen/runner/runner/latency.py b/bench/workgen/runner/runner/latency.py deleted file mode 100644 index 46d9be9bad8..00000000000 --- a/bench/workgen/runner/runner/latency.py +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-2016 MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# runner/latency.py -# Utility functions for showing latency statistics -from __future__ import print_function -import sys - -def _show_buckets(fh, title, mult, buckets, n): - shown = False - s = title + ': ' - for count in range(0, n): - val = buckets[count] - if val != 0: - if shown: - s += ',' - s += str(count*mult) + '=' + str(val) - shown = True - print(s, file=fh) - -def _latency_preprocess(arr, merge): - mx = 0 - cur = 0 - # SWIG arrays have a clunky interface - for i in range(0, arr.__len__()): - if i % merge == 0: - cur = 0 - cur += arr[i] - if cur > mx: - mx = cur - arr.height = mx - -def _latency_plot(box, ch, left, width, arr, merge, scale): - pos = 0 - for x in range(0, width): - t = 0 - for i in range(0, merge): - t += arr[pos] - pos += 1 - nch = scale * t - y = 0 - while nch > 0.0: - box[y][left + x] = ch - nch -= 1.0 - y += 1 - -def _latency_optype(fh, name, ch, t): - if t.ops == 0: - return - if t.latency_ops == 0: - print('**** ' + name + ' operations: ' + str(t.ops), file=fh) - return - print('**** ' + name + ' operations: ' + str(t.ops) + \ - ', latency operations: ' + str(t.latency_ops), file=fh) - print(' avg: ' + str(t.latency/t.latency_ops) + \ - ', min: ' + str(t.min_latency) + ', max: ' + str(t.max_latency), - file=fh) - us = t.us() - ms = t.ms() - sec = t.sec() - _latency_preprocess(us, 40) - _latency_preprocess(ms, 40) - _latency_preprocess(sec, 4) - max_height = max(us.height, ms.height, sec.height) - if max_height == 0: - return - height = 20 # 20 chars high - # a list of a list of characters - box = [list(' ' * 80) for x in range(height)] - scale = (1.0 / (max_height + 1)) * height - _latency_plot(box, ch, 0, 25, us, 40, scale) - _latency_plot(box, ch, 27, 25, ms, 40, scale) - _latency_plot(box, ch, 54, 25, sec, 4, scale) - box.reverse() - for line in box: - print(''.join(line), file=fh) - dash25 = '-' * 25 - print(' '.join([dash25] * 3), file=fh) - print(' 0 - 999 us (40/bucket) 1 - 999 ms (40/bucket) ' + \ - '1 - 99 sec (4/bucket)', file=fh) - print('', file=fh) - _show_buckets(fh, name + ' us', 1, us, 1000) - _show_buckets(fh, name + ' ms', 1000, ms, 1000) - _show_buckets(fh, name + ' sec', 1000000, sec, 100) - print('', file=fh) - -def workload_latency(workload, outfilename = None): - if outfilename: - fh = open(outfilename, 'w') - else: - fh = sys.stdout - _latency_optype(fh, 'insert', 'I', workload.stats.insert) - _latency_optype(fh, 'read', 'R', workload.stats.read) - _latency_optype(fh, 'remove', 'X', workload.stats.remove) - _latency_optype(fh, 'update', 'U', workload.stats.update) - _latency_optype(fh, 'truncate', 'T', workload.stats.truncate) - _latency_optype(fh, 'not found', 'N', workload.stats.not_found) diff --git a/bench/workgen/runner/small_btree.py b/bench/workgen/runner/small_btree.py deleted file mode 100644 index d70f0d9e693..00000000000 --- a/bench/workgen/runner/small_btree.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/python -from runner import * -from wiredtiger import * -from workgen import * - -context = Context() -conn = wiredtiger_open("WT_TEST", "create,cache_size=500MB") -s = conn.open_session() -tname = "file:test.wt" -s.create(tname, 'key_format=S,value_format=S') -table = Table(tname) -table.options.key_size = 20 -table.options.value_size = 100 - -op = Operation(Operation.OP_INSERT, table) -thread = Thread(op * 500000) -pop_workload = Workload(context, thread) -print('populate:') -pop_workload.run(conn) - -op = Operation(Operation.OP_SEARCH, table) -t = Thread(op) -workload = Workload(context, t * 8) -workload.options.run_time = 120 -workload.options.report_interval = 5 -print('read workload:') -workload.run(conn) diff --git a/bench/workgen/setup.py b/bench/workgen/setup.py deleted file mode 100644 index 79d3fc4297c..00000000000 --- a/bench/workgen/setup.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-2017 MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# - -from __future__ import print_function -import re, os, sys -from distutils.core import setup, Extension - -# OS X hack: turn off the Universal binary support that is built into the -# Python build machinery, just build for the default CPU architecture. -if not 'ARCHFLAGS' in os.environ: - os.environ['ARCHFLAGS'] = '' - -# Suppress warnings building SWIG generated code -extra_cflags = [ '-Wmissing-field-initializers', '-Wextra', '-Wno-shadow', '-I../../src/include', '-I../../test/utility'] - -dir = os.path.dirname(__file__) -abs_dir = os.path.dirname(os.path.abspath(__file__)) - -if abs_dir.endswith(os.sep + os.path.join('bench', 'workgen')): - wt_dir = os.path.dirname(os.path.dirname(abs_dir)) -else: - print(os.path.basename(__file__) + ": running from unknown dir", file=sys.stderr) - sys.exit(1) - -build_dir = os.path.join(wt_dir, 'build_posix') - -# Read the version information from the RELEASE_INFO file -for l in open(os.path.join(dir, '..', '..', 'RELEASE_INFO')): - if re.match(r'WIREDTIGER_VERSION_(?:MAJOR|MINOR|PATCH)=', l): - exec(l) - -wt_ver = '%d.%d' % (WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR) - -setup(name='workgen', version=wt_ver, - ext_modules=[Extension('_workgen', - [os.path.join(dir, 'workgen_wrap.cxx')], - libraries=['wiredtiger', 'pthread'], - extra_objects = [ os.path.join(build_dir, 'bench', 'workgen', \ - '.libs', 'libworkgen.a') ], - extra_compile_args=extra_cflags, - )], - package_dir={'' : dir}, - packages=['workgen'], -) diff --git a/bench/workgen/workgen.cxx b/bench/workgen/workgen.cxx deleted file mode 100644 index c56acfd2989..00000000000 --- a/bench/workgen/workgen.cxx +++ /dev/null @@ -1,1605 +0,0 @@ -/*- - * Public Domain 2014-2017 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. - * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#define __STDC_LIMIT_MACROS // needed to get UINT64_MAX in C++ -#include -#include -#include -#include -#include "wiredtiger.h" -#include "workgen.h" -#include "workgen_int.h" -#include "workgen_time.h" -extern "C" { -// Include some specific WT files, as some files included by wt_internal.h -// have some C-ism's that don't work in C++. -#include -#include -#include -#include -#include -#include -#include -#include "error.h" -#include "misc.h" -} - -#define LATENCY_US_BUCKETS 1000 -#define LATENCY_MS_BUCKETS 1000 -#define LATENCY_SEC_BUCKETS 100 - -#define THROTTLE_PER_SEC 20 // times per sec we will throttle - -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define MAX(a, b) ((a) < (b) ? (b) : (a)) -#define TIMESPEC_DOUBLE(ts) ((double)(ts).tv_sec + ts.tv_nsec * 0.000000001) -#define PCT(n, total) ((total) == 0 ? 0 : ((n) * 100) / (total)) -#define OPS_PER_SEC(ops, ts) (int) ((ts) == 0 ? 0.0 : \ - (ops) / TIMESPEC_DOUBLE(ts)) - -// Get the value of a STL container, even if it is not present -#define CONTAINER_VALUE(container, idx, dfault) \ - (((container).count(idx) > 0) ? (container)[idx] : (dfault)) - -#define CROSS_USAGE(a, b) \ - (((a & USAGE_READ) != 0 && (b & USAGE_WRITE) != 0) || \ - ((a & USAGE_WRITE) != 0 && (b & USAGE_READ) != 0)) - -#define ASSERT(cond) \ - do { \ - if (!(cond)) { \ - fprintf(stderr, "%s:%d: ASSERT failed: %s\n", \ - __FILE__, __LINE__, #cond); \ - abort(); \ - } \ - } while(0) - -#define THROW_ERRNO(e, args) \ - do { \ - std::stringstream __sstm; \ - __sstm << args; \ - WorkgenException __wge(e, __sstm.str().c_str()); \ - throw(__wge); \ - } while(0) - -#define THROW(args) THROW_ERRNO(0, args) - -#define VERBOSE(runner, args) \ - do { \ - if ((runner)._context->_verbose) \ - std::cout << args << std::endl; \ - } while(0) - -#define OP_HAS_VALUE(op) \ - ((op)->_optype == Operation::OP_INSERT || \ - (op)->_optype == Operation::OP_UPDATE) - -namespace workgen { - -// The number of contexts. Normally there is one context created, but it will -// be possible to use several eventually. More than one is not yet -// implemented, but we must at least guard against the caller creating more -// than one. -static uint32_t context_count = 0; - -static void *thread_runner_main(void *arg) { - ThreadRunner *runner = (ThreadRunner *)arg; - try { - runner->_errno = runner->run(); - } catch (WorkgenException &wge) { - runner->_exception = wge; - } - return (NULL); -} - -static void *monitor_main(void *arg) { - Monitor *monitor = (Monitor *)arg; - try { - monitor->_errno = monitor->run(); - } catch (WorkgenException &wge) { - monitor->_exception = wge; - } - return (NULL); -} - -// Exponentiate (like the pow function), except that it returns an exact -// integral 64 bit value, and if it overflows, returns the maximum possible -// value for the return type. -static uint64_t power64(int base, int exp) { - uint64_t last, result; - - result = 1; - for (int i = 0; i < exp; i++) { - last = result; - result *= base; - if (result < last) - return UINT64_MAX; - } - return result; -} - -OptionsList::OptionsList() : _option_map() {} -OptionsList::OptionsList(const OptionsList &other) : - _option_map(other._option_map) {} - -void OptionsList::add_option(const char *name, const std::string typestr, - const char *desc) { - TypeDescPair pair(typestr, desc); - _option_map[name] = pair; -} - -void OptionsList::add_int(const char *name, int default_value, - const char *desc) { - std::stringstream sstm; - sstm << "int, default=" << default_value; - add_option(name, sstm.str(), desc); -} - -void OptionsList::add_bool(const char *name, bool default_value, - const char *desc) { - std::stringstream sstm; - sstm << "boolean, default=" << (default_value ? "true" : "false"); - add_option(name, sstm.str(), desc); -} - -void OptionsList::add_double(const char *name, double default_value, - const char *desc) { - std::stringstream sstm; - sstm << "double, default=" << default_value; - add_option(name, sstm.str(), desc); -} - -void OptionsList::add_string(const char *name, - const std::string &default_value, const char *desc) { - std::stringstream sstm; - sstm << "string, default=\"" << default_value << "\""; - add_option(name, sstm.str(), desc); -} - -static void -pretty_print(const char *p, const char *indent, std::stringstream &sstm) -{ - const char *t; - - for (;; p = t + 1) { - if (strlen(p) <= 70) - break; - for (t = p + 70; t > p && *t != ' '; --t) - ; - if (t == p) /* No spaces? */ - break; - if (indent != NULL) - sstm << indent; - std::string line(p, (size_t)(t - p)); - sstm << line << std::endl; - } - if (*p != '\0') { - if (indent != NULL) - sstm << indent; - sstm << p << std::endl; - } -} - -std::string OptionsList::help() const { - std::stringstream sstm; - for (std::map::const_iterator i = - _option_map.begin(); i != _option_map.end(); i++) { - sstm << i->first << " (" << i->second.first << ")" << std::endl; - pretty_print(i->second.second.c_str(), "\t", sstm); - } - return sstm.str(); -} - -std::string OptionsList::help_description(const char *option_name) const { - const std::string key(option_name); - if (_option_map.count(key) == 0) - return (std::string("")); - else - return (_option_map.find(key)->second.second); -} - -std::string OptionsList::help_type(const char *option_name) const { - const std::string key(option_name); - if (_option_map.count(key) == 0) - return std::string(""); - else - return (_option_map.find(key)->second.first); -} - -Context::Context() : _verbose(false), _internal(new ContextInternal()) {} -Context::~Context() { delete _internal; } -Context& Context::operator=(const Context &other) { - _verbose = other._verbose; - *_internal = *other._internal; - return (*this); -} - -ContextInternal::ContextInternal() : _tint(), _table_names(), - _recno(NULL), _recno_alloced(0), _tint_last(0), _context_count(0) { - uint32_t count; - if ((count = workgen_atomic_add32(&context_count, 1)) != 1) - THROW("multiple Contexts not supported"); - _context_count = count; -} - -ContextInternal::~ContextInternal() { - if (_recno != NULL) - delete _recno; -} - -int ContextInternal::create_all() { - if (_recno_alloced != _tint_last) { - // The array references are 1-based, we'll waste one entry. - uint64_t *new_recno = new uint64_t[_tint_last + 1]; - memcpy(new_recno, _recno, sizeof(uint64_t) * _recno_alloced); - memset(&new_recno[_recno_alloced], 0, - sizeof(uint64_t) * (_tint_last - _recno_alloced + 1)); - delete _recno; - _recno = new_recno; - _recno_alloced = _tint_last; - } - return (0); -} - -Monitor::Monitor(WorkloadRunner &wrunner) : - _errno(0), _exception(), _wrunner(wrunner), _stop(false), _handle() {} -Monitor::~Monitor() {} - -int Monitor::run() { - struct timespec t; - struct tm *tm, _tm; - char time_buf[64]; - Stats prev_totals; - WorkloadOptions *options = &_wrunner._workload->options; - uint64_t latency_max = (uint64_t)options->max_latency; - - (*_out) << "#time," - << "totalsec," - << "read ops per second," - << "insert ops per second," - << "update ops per second," - << "checkpoints," - << "read average latency(uS)," - << "read minimum latency(uS)," - << "read maximum latency(uS)," - << "insert average latency(uS)," - << "insert min latency(uS)," - << "insert maximum latency(uS)," - << "update average latency(uS)," - << "update min latency(uS)," - << "update maximum latency(uS)" - << std::endl; - - Stats prev_interval; - while (!_stop) { - for (int i = 0; i < options->sample_interval && !_stop; i++) - sleep(1); - if (_stop) - break; - - workgen_epoch(&t); - tm = localtime_r(&t.tv_sec, &_tm); - (void)strftime(time_buf, sizeof(time_buf), "%b %d %H:%M:%S", tm); - - Stats new_totals(true); - for (std::vector::iterator tr = - _wrunner._trunners.begin(); tr != _wrunner._trunners.end(); tr++) - new_totals.add(tr->_stats, true); - Stats interval(new_totals); - interval.subtract(prev_totals); - interval.smooth(prev_interval); - - int interval_secs = options->sample_interval; - uint64_t cur_reads = interval.read.ops / interval_secs; - uint64_t cur_inserts = interval.insert.ops / interval_secs; - uint64_t cur_updates = interval.update.ops / interval_secs; - - uint64_t totalsec = ts_sec(t - _wrunner._start); - (*_out) << time_buf - << "," << totalsec - << "," << cur_reads - << "," << cur_inserts - << "," << cur_updates - << "," << 'N' // checkpoint in progress - << "," << interval.read.average_latency() - << "," << interval.read.min_latency - << "," << interval.read.max_latency - << "," << interval.insert.average_latency() - << "," << interval.insert.min_latency - << "," << interval.insert.max_latency - << "," << interval.update.average_latency() - << "," << interval.update.min_latency - << "," << interval.update.max_latency - << std::endl; - - uint64_t read_max = interval.read.max_latency; - uint64_t insert_max = interval.read.max_latency; - uint64_t update_max = interval.read.max_latency; - - if (latency_max != 0 && - (read_max > latency_max || insert_max > latency_max || - update_max > latency_max)) { - std::cerr << "WARNING: max latency exceeded:" - << " threshold " << latency_max - << " read max " << read_max - << " insert max " << insert_max - << " update max " << update_max << std::endl; - } - - prev_interval.assign(interval); - prev_totals.assign(new_totals); - } - return (0); -} - -ThreadRunner::ThreadRunner() : - _errno(0), _exception(), _thread(NULL), _context(NULL), _icontext(NULL), - _workload(NULL), _wrunner(NULL), _rand_state(NULL), - _throttle(NULL), _throttle_ops(0), _throttle_limit(0), - _in_transaction(false), _number(0), _stats(false), _table_usage(), - _cursors(NULL), _stop(false), _session(NULL), _keybuf(NULL), - _valuebuf(NULL), _repeat(false) { -} - -ThreadRunner::~ThreadRunner() { - free_all(); -} - -int ThreadRunner::create_all(WT_CONNECTION *conn) { - size_t keysize, valuesize; - - WT_RET(close_all()); - ASSERT(_session == NULL); - WT_RET(conn->open_session(conn, NULL, NULL, &_session)); - _table_usage.clear(); - _stats.track_latency(_workload->options.sample_interval > 0); - WT_RET(workgen_random_alloc(_session, &_rand_state)); - _throttle_ops = 0; - _throttle_limit = 0; - _in_transaction = 0; - keysize = 1; - valuesize = 1; - op_create_all(&_thread->_op, keysize, valuesize); - _keybuf = new char[keysize]; - _valuebuf = new char[valuesize]; - _keybuf[keysize - 1] = '\0'; - _valuebuf[valuesize - 1] = '\0'; - return (0); -} - -int ThreadRunner::open_all() { - typedef WT_CURSOR *WT_CURSOR_PTR; - if (_cursors != NULL) - delete _cursors; - _cursors = new WT_CURSOR_PTR[_icontext->_tint_last + 1]; - memset(_cursors, 0, sizeof (WT_CURSOR *) * (_icontext->_tint_last + 1)); - for (std::map::iterator i = _table_usage.begin(); - i != _table_usage.end(); i++) { - uint32_t tindex = i->first; - const char *uri = _icontext->_table_names[tindex].c_str(); - WT_RET(_session->open_cursor(_session, uri, NULL, NULL, - &_cursors[tindex])); - } - return (0); -} - -int ThreadRunner::close_all() { - if (_throttle != NULL) { - delete _throttle; - _throttle = NULL; - } - if (_session != NULL) { - WT_RET(_session->close(_session, NULL)); - _session = NULL; - } - free_all(); - return (0); -} - -void ThreadRunner::free_all() { - if (_rand_state != NULL) { - workgen_random_free(_rand_state); - _rand_state = NULL; - } - if (_cursors != NULL) { - delete _cursors; - _cursors = NULL; - } - if (_keybuf != NULL) { - delete _keybuf; - _keybuf = NULL; - } - if (_valuebuf != NULL) { - delete _valuebuf; - _valuebuf = NULL; - } -} - -int ThreadRunner::cross_check(std::vector &runners) { - std::map usage; - - // Determine which tables have cross usage - for (std::vector::iterator r = runners.begin(); - r != runners.end(); r++) { - for (std::map::iterator i = r->_table_usage.begin(); - i != r->_table_usage.end(); i++) { - uint32_t tindex = i->first; - uint32_t thisusage = i->second; - uint32_t curusage = CONTAINER_VALUE(usage, tindex, 0); - if (CROSS_USAGE(curusage, thisusage)) - curusage |= USAGE_MIXED; - usage[tindex] = curusage; - } - } - for (std::map::iterator i = usage.begin(); - i != usage.end(); i++) { - if ((i->second & USAGE_MIXED) != 0) { - for (std::vector::iterator r = runners.begin(); - r != runners.end(); r++) { - r->_table_usage[i->first] |= USAGE_MIXED; - } - } - } - return (0); -} - -int ThreadRunner::run() { - WT_DECL_RET; - ThreadOptions *options = &_thread->options; - std::string name = options->name; - - VERBOSE(*this, "thread " << name << " running"); - if (options->throttle != 0) { - _throttle = new Throttle(*this, options->throttle, - options->throttle_burst); - } - for (int cnt = 0; !_stop && (_repeat || cnt < 1) && ret == 0; cnt++) - WT_ERR(op_run(&_thread->_op)); - -err: -#ifdef _DEBUG - { - std::string messages = this->get_debug(); - if (!messages.empty()) - std::cerr << "DEBUG (thread " << name << "): " - << messages << std::endl; - } -#endif - if (ret != 0) - std::cerr << "thread " << name << " failed err=" << ret << std::endl; - VERBOSE(*this, "thread " << name << "finished"); - return (ret); -} - -void ThreadRunner::get_static_counts(Stats &stats) { - _thread->_op.get_static_counts(stats, 1); -} - -void ThreadRunner::op_create_all(Operation *op, size_t &keysize, - size_t &valuesize) { - tint_t tint; - - op->size_check(); - if (op->_optype != Operation::OP_NONE) { - op->kv_compute_max(true); - if (OP_HAS_VALUE(op)) - op->kv_compute_max(false); - op->kv_size_buffer(true, keysize); - op->kv_size_buffer(false, valuesize); - - // Note: to support multiple contexts we'd need a generation - // count whenever we execute. - if (op->_table._internal->_context_count != 0 && - op->_table._internal->_context_count != _icontext->_context_count) - THROW("multiple Contexts not supported"); - if ((tint = op->_table._internal->_tint) == 0) { - std::string uri = op->_table._uri; - - // We are single threaded in this function, so do not have - // to worry about locking. - if (_icontext->_tint.count(uri) == 0) { - // TODO: don't use atomic add, it's overkill. - tint = workgen_atomic_add32(&_icontext->_tint_last, 1); - _icontext->_tint[uri] = tint; - _icontext->_table_names[tint] = uri; - } else - tint = _icontext->_tint[uri]; - op->_table._internal->_tint = tint; - } - uint32_t usage_flags = CONTAINER_VALUE(_table_usage, - op->_table._internal->_tint, 0); - if (op->_optype == Operation::OP_SEARCH) - usage_flags |= ThreadRunner::USAGE_READ; - else - usage_flags |= ThreadRunner::USAGE_WRITE; - _table_usage[op->_table._internal->_tint] = usage_flags; - } - if (op->_group != NULL) - for (std::vector::iterator i = op->_group->begin(); - i != op->_group->end(); i++) - op_create_all(&*i, keysize, valuesize); -} - -uint64_t ThreadRunner::op_get_key_recno(Operation *op, tint_t tint) { - uint64_t recno_count; - uint32_t rand; - - recno_count = _icontext->_recno[tint]; - if (recno_count == 0) - // The file has no entries, returning 0 forces a WT_NOTFOUND return. - return (0); - rand = workgen_random(_rand_state); - return (rand % recno_count + 1); // recnos are one-based. -} - -int ThreadRunner::op_run(Operation *op) { - Track *track; - tint_t tint = op->_table._internal->_tint; - WT_CURSOR *cursor = _cursors[tint]; - WT_DECL_RET; - uint64_t recno; - bool measure_latency; - - recno = 0; - track = NULL; - if (_throttle != NULL) { - if (_throttle_ops >= _throttle_limit && !_in_transaction) { - WT_ERR(_throttle->throttle(_throttle_ops, - &_throttle_limit)); - _throttle_ops = 0; - } - if (op->_optype != Operation::OP_NONE) - ++_throttle_ops; - } - - // A potential race: thread1 is inserting, and increments - // Context->_recno[] for fileX.wt. thread2 is doing one of - // remove/search/update and grabs the new value of Context->_recno[] - // for fileX.wt. thread2 randomly chooses the highest recno (which - // has not yet been inserted by thread1), and when it accesses - // the record will get WT_NOTFOUND. It should be somewhat rare - // (and most likely when the threads are first beginning). Any - // WT_NOTFOUND returns are allowed and get their own statistic bumped. - switch (op->_optype) { - case Operation::OP_INSERT: - track = &_stats.insert; - recno = workgen_atomic_add64(&_icontext->_recno[tint], 1); - break; - case Operation::OP_REMOVE: - track = &_stats.remove; - recno = op_get_key_recno(op, tint); - break; - case Operation::OP_SEARCH: - track = &_stats.read; - recno = op_get_key_recno(op, tint); - break; - case Operation::OP_UPDATE: - track = &_stats.update; - recno = op_get_key_recno(op, tint); - break; - case Operation::OP_NONE: - recno = 0; - break; - } - - measure_latency = track != NULL && track->ops != 0 && - track->track_latency() && - (track->ops % _workload->options.sample_rate == 0); - - timespec start; - if (measure_latency) - workgen_epoch(&start); - - if (op->_transaction != NULL) { - if (_in_transaction) - THROW("nested transactions not supported"); - _session->begin_transaction(_session, - op->_transaction->_begin_config.c_str()); - _in_transaction = true; - } - if (op->_optype != Operation::OP_NONE) { - op->kv_gen(true, recno, _keybuf); - cursor->set_key(cursor, _keybuf); - if (OP_HAS_VALUE(op)) { - op->kv_gen(false, recno, _valuebuf); - cursor->set_value(cursor, _valuebuf); - } - switch (op->_optype) { - case Operation::OP_INSERT: - WT_ERR(cursor->insert(cursor)); - break; - case Operation::OP_REMOVE: - WT_ERR_NOTFOUND_OK(cursor->remove(cursor)); - break; - case Operation::OP_SEARCH: - ret = cursor->search(cursor); - break; - case Operation::OP_UPDATE: - WT_ERR_NOTFOUND_OK(cursor->update(cursor)); - break; - default: - ASSERT(false); - } - if (ret != 0) { - track = &_stats.not_found; - ret = 0; // WT_NOTFOUND allowed. - } - cursor->reset(cursor); - } - if (measure_latency) { - timespec stop; - workgen_epoch(&stop); - track->incr_with_latency(ts_us(stop - start)); - } else if (track != NULL) - track->incr(); - - if (op->_group != NULL) - for (int count = 0; !_stop && count < op->_repeatgroup; count++) - for (std::vector::iterator i = op->_group->begin(); - i != op->_group->end(); i++) - WT_ERR(op_run(&*i)); -err: - if (op->_transaction != NULL) { - if (ret != 0 || op->_transaction->_rollback) - WT_TRET(_session->rollback_transaction(_session, NULL)); - else - ret = _session->commit_transaction(_session, - op->_transaction->_commit_config.c_str()); - _in_transaction = false; - } - return (ret); -} - -#ifdef _DEBUG -std::string ThreadRunner::get_debug() { - return (_debug_messages.str()); -} -#endif - -Throttle::Throttle(ThreadRunner &runner, double throttle, - double throttle_burst) : _runner(runner), _throttle(throttle), - _burst(throttle_burst), _next_div(), _ops_delta(0), _ops_prev(0), - _ops_per_div(0), _ms_per_div(0), _started(false) { - ts_clear(_next_div); - _ms_per_div = ceill(1000.0 / THROTTLE_PER_SEC); - _ops_per_div = ceill(_throttle / THROTTLE_PER_SEC); -} - -Throttle::~Throttle() {} - -// Given a random 32-bit value, return a float value equally distributed -// between -1.0 and 1.0. -static float rand_signed(uint32_t r) { - int sign = ((r & 0x1) == 0 ? 1 : -1); - return (((float)r * sign) / UINT32_MAX); -} - -// Each time throttle is called, we sleep and return a number of operations to -// perform next. To implement this we keep a time calculation in _next_div set -// initially to the current time + 1/THROTTLE_PER_SEC. Each call to throttle -// advances _next_div by 1/THROTTLE_PER_SEC, and if _next_div is in the future, -// we sleep for the difference between the _next_div and the current_time. We -// always return (Thread.options.throttle / THROTTLE_PER_SEC) as the number of -// operations. -// -// The only variation is that the amount of individual sleeps is modified by a -// random amount (which varies more widely as Thread.options.throttle_burst is -// greater). This has the effect of randomizing how much clumping happens, and -// ensures that multiple threads aren't executing in lock step. -// -int Throttle::throttle(uint64_t op_count, uint64_t *op_limit) { - uint64_t ops; - int64_t sleep_ms; - timespec now; - - workgen_epoch(&now); - DEBUG_CAPTURE(_runner, "throttle: ops=" << op_count); - if (!_started) { - _next_div = ts_add_ms(now, _ms_per_div); - _started = true; - } else { - _ops_delta += (op_count - _ops_prev); - if (now < _next_div) { - sleep_ms = ts_ms(_next_div - now); - sleep_ms += (_ms_per_div * _burst * - rand_signed(workgen_random(_runner._rand_state))); - if (sleep_ms > 0) { - DEBUG_CAPTURE(_runner, ", sleep=" << sleep_ms); - usleep((useconds_t)ms_to_us(sleep_ms)); - } - } - _next_div = ts_add_ms(_next_div, _ms_per_div); - } - ops = _ops_per_div; - if (_ops_delta < (int64_t)ops) { - ops -= _ops_delta; - _ops_delta = 0; - } else { - _ops_delta -= ops; - ops = 0; - } - *op_limit = ops; - _ops_prev = ops; - DEBUG_CAPTURE(_runner, ", return=" << ops << std::endl); - return (0); -} - -ThreadOptions::ThreadOptions() : name(), throttle(0.0), throttle_burst(1.0), - _options() { - _options.add_string("name", name, "name of the thread"); - _options.add_double("throttle", throttle, - "Limit to this number of operations per second"); - _options.add_double("throttle_burst", throttle_burst, - "Changes characteristic of throttling from smooth (0.0) " - "to having large bursts with lulls (10.0 or larger)"); -} -ThreadOptions::ThreadOptions(const ThreadOptions &other) : - name(other.name), throttle(other.throttle), - throttle_burst(other.throttle_burst), _options(other._options) {} -ThreadOptions::~ThreadOptions() {} - -void -ThreadListWrapper::extend(const ThreadListWrapper &other) { - for (std::vector::const_iterator i = other._threads.begin(); - i != other._threads.end(); i++) - _threads.push_back(*i); -} - -void -ThreadListWrapper::append(const Thread &t) { - _threads.push_back(t); -} - -void -ThreadListWrapper::multiply(const int n) { - if (n == 0) { - _threads.clear(); - } else { - std::vector copy(_threads); - for (int cnt = 1; cnt < n; cnt++) - extend(copy); - } -} - -Thread::Thread() : options(), _op() { -} - -Thread::Thread(const Operation &op) : options(), _op(op) { -} - -Thread::Thread(const Thread &other) : options(other.options), _op(other._op) { -} - -Thread::~Thread() { -} - -void Thread::describe(std::ostream &os) const { - os << "Thread: [" << std::endl; - _op.describe(os); os << std::endl; - os << "]"; -} - -Operation::Operation() : - _optype(OP_NONE), _table(), _key(), _value(), _transaction(NULL), - _group(NULL), _repeatgroup(0), - _keysize(0), _valuesize(0), _keymax(0), _valuemax(0) { -} - -Operation::Operation(OpType optype, Table table, Key key, Value value) : - _optype(optype), _table(table), _key(key), _value(value), - _transaction(NULL), _group(NULL), _repeatgroup(0), - _keysize(0), _valuesize(0), _keymax(0), _valuemax(0) { - size_check(); -} - -Operation::Operation(OpType optype, Table table, Key key) : - _optype(optype), _table(table), _key(key), _value(), _transaction(NULL), - _group(NULL), _repeatgroup(0), - _keysize(0), _valuesize(0), _keymax(0), _valuemax(0) { - size_check(); -} - -Operation::Operation(OpType optype, Table table) : - _optype(optype), _table(table), _key(), _value(), _transaction(NULL), - _group(NULL), _repeatgroup(0), - _keysize(0), _valuesize(0), _keymax(0), _valuemax(0) { - size_check(); -} - -Operation::Operation(const Operation &other) : - _optype(other._optype), _table(other._table), _key(other._key), - _value(other._value), _transaction(other._transaction), - _group(other._group), _repeatgroup(other._repeatgroup), - _keysize(other._keysize), _valuesize(other._valuesize), - _keymax(other._keymax), _valuemax(other._valuemax) { - // Creation and destruction of _group and _transaction is managed - // by Python. -} - -Operation::~Operation() { - // Creation and destruction of _group, _transaction is managed by Python. -} - -Operation& Operation::operator=(const Operation &other) { - _optype = other._optype; - _table = other._table; - _key = other._key; - _value = other._value; - _transaction = other._transaction; - _group = other._group; - _repeatgroup = other._repeatgroup; - _keysize = other._keysize; - _valuesize = other._valuesize; - _keymax = other._keymax; - _valuemax = other._valuemax; - return (*this); -} - -void Operation::describe(std::ostream &os) const { - os << "Operation: " << _optype; - if (_optype != OP_NONE) { - os << ", "; _table.describe(os); - os << ", "; _key.describe(os); - os << ", "; _value.describe(os); - } - if (_transaction != NULL) { - os << ", ["; _transaction->describe(os); os << "]"; - } - if (_group != NULL) { - os << ", group[" << _repeatgroup << "]: {"; - bool first = true; - for (std::vector::const_iterator i = _group->begin(); - i != _group->end(); i++) { - if (!first) - os << "}, {"; - i->describe(os); - first = false; - } - os << "}"; - } -} - -void Operation::get_static_counts(Stats &stats, int multiplier) { - switch (_optype) { - case OP_NONE: - break; - case OP_INSERT: - stats.insert.ops += multiplier; - break; - case OP_REMOVE: - stats.remove.ops += multiplier; - break; - case OP_SEARCH: - stats.read.ops += multiplier; - break; - case OP_UPDATE: - stats.update.ops += multiplier; - break; - default: - ASSERT(false); - } - if (_group != NULL) - for (std::vector::iterator i = _group->begin(); - i != _group->end(); i++) - i->get_static_counts(stats, multiplier * _repeatgroup); -} - -void Operation::kv_compute_max(bool iskey) { - uint64_t max; - int size; - - size = iskey ? _key._size : _value._size; - if (size == 0) - size = iskey ? _table.options.key_size : _table.options.value_size; - - if (iskey && size < 2) - THROW("Key.size too small for table '" << _table._uri << "'"); - if (!iskey && size < 1) - THROW("Value.size too small for table '" << _table._uri << "'"); - - if (size > 1) - max = power64(10, (size - 1)) - 1; - else - max = 0; - - if (iskey) { - _keysize = size; - _keymax = max; - } else { - _valuesize = size; - _valuemax = max; - } -} - -void Operation::kv_size_buffer(bool iskey, size_t &maxsize) const { - if (iskey) { - if ((size_t)_keysize > maxsize) - maxsize = _keysize; - } else { - if ((size_t)_valuesize > maxsize) - maxsize = _valuesize; - } -} - -void Operation::kv_gen(bool iskey, uint64_t n, char *result) const { - uint64_t max; - int size; - - size = iskey ? _keysize : _valuesize; - max = iskey ? _keymax : _valuemax; - if (n > max) - THROW((iskey ? "Key" : "Value") << " (" << n - << ") too large for size (" << size << ")"); - workgen_u64_to_string_zf(n, result, size); -} - -void Operation::size_check() const { - if (_optype != OP_NONE && _key._size == 0 && _table.options.key_size == 0) - THROW("operation requires a key size"); - if (OP_HAS_VALUE(this) && _value._size == 0 && - _table.options.value_size == 0) - THROW("operation requires a value size"); -} - -Track::Track(bool latency_tracking) : ops(0), latency_ops(0), latency(0), - min_latency(0), max_latency(0), us(NULL), ms(NULL), sec(NULL) { - track_latency(latency_tracking); -} - -Track::Track(const Track &other) : ops(other.ops), - latency_ops(other.latency_ops), latency(other.latency), - min_latency(other.min_latency), max_latency(other.max_latency), - us(NULL), ms(NULL), sec(NULL) { - if (other.us != NULL) { - us = new uint32_t[LATENCY_US_BUCKETS]; - ms = new uint32_t[LATENCY_MS_BUCKETS]; - sec = new uint32_t[LATENCY_SEC_BUCKETS]; - memcpy(us, other.us, sizeof(uint32_t) * LATENCY_US_BUCKETS); - memcpy(ms, other.ms, sizeof(uint32_t) * LATENCY_MS_BUCKETS); - memcpy(sec, other.sec, sizeof(uint32_t) * LATENCY_SEC_BUCKETS); - } -} - -Track::~Track() { - if (us != NULL) { - delete us; - delete ms; - delete sec; - } -} - -void Track::add(Track &other, bool reset) { - ops += other.ops; - latency_ops += other.latency_ops; - latency += other.latency; - - min_latency = MIN(min_latency, other.min_latency); - if (reset) - other.min_latency = 0; - max_latency = MAX(max_latency, other.max_latency); - if (reset) - other.max_latency = 0; - - if (us != NULL && other.us != NULL) { - for (int i = 0; i < LATENCY_US_BUCKETS; i++) - us[i] += other.us[i]; - for (int i = 0; i < LATENCY_MS_BUCKETS; i++) - ms[i] += other.ms[i]; - for (int i = 0; i < LATENCY_SEC_BUCKETS; i++) - sec[i] += other.sec[i]; - } -} - -void Track::assign(const Track &other) { - ops = other.ops; - latency_ops = other.latency_ops; - latency = other.latency; - min_latency = other.min_latency; - max_latency = other.max_latency; - - if (other.us == NULL && us != NULL) { - delete us; - delete ms; - delete sec; - us = NULL; - ms = NULL; - sec = NULL; - } - else if (other.us != NULL && us == NULL) { - us = new uint32_t[LATENCY_US_BUCKETS]; - ms = new uint32_t[LATENCY_MS_BUCKETS]; - sec = new uint32_t[LATENCY_SEC_BUCKETS]; - } - if (us != NULL) { - memcpy(us, other.us, sizeof(uint32_t) * LATENCY_US_BUCKETS); - memcpy(ms, other.ms, sizeof(uint32_t) * LATENCY_MS_BUCKETS); - memcpy(sec, other.sec, sizeof(uint32_t) * LATENCY_SEC_BUCKETS); - } -} - -uint64_t Track::average_latency() const { - if (latency_ops == 0) - return (0); - else - return (latency / latency_ops); -} - -void Track::clear() { - ops = 0; - latency_ops = 0; - latency = 0; - min_latency = 0; - max_latency = 0; - if (us != NULL) { - memset(us, 0, sizeof(uint32_t) * LATENCY_US_BUCKETS); - memset(ms, 0, sizeof(uint32_t) * LATENCY_MS_BUCKETS); - memset(sec, 0, sizeof(uint32_t) * LATENCY_SEC_BUCKETS); - } -} - -void Track::incr() { - ops++; -} - -void Track::incr_with_latency(uint64_t usecs) { - ASSERT(us != NULL); - - ops++; - latency_ops++; - latency += usecs; - if (usecs > max_latency) - max_latency = (uint32_t)usecs; - if (usecs < min_latency) - min_latency = (uint32_t)usecs; - - // Update a latency bucket. - // First buckets: usecs from 100us to 1000us at 100us each. - if (usecs < LATENCY_US_BUCKETS) - us[usecs]++; - - // Second buckets: milliseconds from 1ms to 1000ms, at 1ms each. - else if (usecs < ms_to_us(LATENCY_MS_BUCKETS)) - ms[us_to_ms(usecs)]++; - - // Third buckets are seconds from 1s to 100s, at 1s each. - else if (usecs < sec_to_us(LATENCY_SEC_BUCKETS)) - sec[us_to_sec(usecs)]++; - - // >100 seconds, accumulate in the biggest bucket. */ - else - sec[LATENCY_SEC_BUCKETS - 1]++; -} - -void Track::subtract(const Track &other) { - ops -= other.ops; - latency_ops -= other.latency_ops; - latency -= other.latency; - - // There's no sensible thing to be done for min/max_latency. - - if (us != NULL && other.us != NULL) { - for (int i = 0; i < LATENCY_US_BUCKETS; i++) - us[i] -= other.us[i]; - for (int i = 0; i < LATENCY_MS_BUCKETS; i++) - ms[i] -= other.ms[i]; - for (int i = 0; i < LATENCY_SEC_BUCKETS; i++) - sec[i] -= other.sec[i]; - } -} - -// If there are no entries in this Track, take them from -// a previous Track. Used to smooth graphs. We don't worry -// about latency buckets here. -void Track::smooth(const Track &other) { - if (latency_ops == 0) { - ops = other.ops; - latency = other.latency; - latency_ops = other.latency_ops; - min_latency = other.min_latency; - max_latency = other.max_latency; - } -} - -void Track::track_latency(bool newval) { - if (newval) { - if (us == NULL) { - us = new uint32_t[LATENCY_US_BUCKETS]; - ms = new uint32_t[LATENCY_MS_BUCKETS]; - sec = new uint32_t[LATENCY_SEC_BUCKETS]; - memset(us, 0, sizeof(uint32_t) * LATENCY_US_BUCKETS); - memset(ms, 0, sizeof(uint32_t) * LATENCY_MS_BUCKETS); - memset(sec, 0, sizeof(uint32_t) * LATENCY_SEC_BUCKETS); - } - } else { - if (us != NULL) { - delete us; - delete ms; - delete sec; - us = NULL; - ms = NULL; - sec = NULL; - } - } -} - -void Track::_get_us(long *result) { - if (us != NULL) { - for (int i = 0; i < LATENCY_US_BUCKETS; i++) - result[i] = (long)us[i]; - } else - memset(result, 0, sizeof(long) * LATENCY_US_BUCKETS); -} -void Track::_get_ms(long *result) { - if (ms != NULL) { - for (int i = 0; i < LATENCY_MS_BUCKETS; i++) - result[i] = (long)ms[i]; - } else - memset(result, 0, sizeof(long) * LATENCY_MS_BUCKETS); -} -void Track::_get_sec(long *result) { - if (sec != NULL) { - for (int i = 0; i < LATENCY_SEC_BUCKETS; i++) - result[i] = (long)sec[i]; - } else - memset(result, 0, sizeof(long) * LATENCY_SEC_BUCKETS); -} - -Stats::Stats(bool latency) : insert(latency), not_found(latency), - read(latency), remove(latency), update(latency), truncate(latency) { -} - -Stats::Stats(const Stats &other) : insert(other.insert), - not_found(other.not_found), read(other.read), remove(other.remove), - update(other.update), truncate(other.truncate) { -} - -Stats::~Stats() {} - -void Stats::add(Stats &other, bool reset) { - insert.add(other.insert, reset); - not_found.add(other.not_found, reset); - read.add(other.read, reset); - remove.add(other.remove, reset); - update.add(other.update, reset); - truncate.add(other.truncate, reset); -} - -void Stats::assign(const Stats &other) { - insert.assign(other.insert); - not_found.assign(other.not_found); - read.assign(other.read); - remove.assign(other.remove); - update.assign(other.update); - truncate.assign(other.truncate); -} - -void Stats::clear() { - insert.clear(); - not_found.clear(); - read.clear(); - remove.clear(); - update.clear(); - truncate.clear(); -} - -void Stats::describe(std::ostream &os) const { - os << "Stats: reads " << read.ops; - if (not_found.ops > 0) { - os << " (" << not_found.ops << " not found)"; - } - os << ", inserts " << insert.ops; - os << ", updates " << update.ops; - os << ", truncates " << truncate.ops; - os << ", removes " << remove.ops; -} - -void Stats::final_report(std::ostream &os, timespec &totalsecs) const { - uint64_t ops = 0; - ops += read.ops; - ops += not_found.ops; - ops += insert.ops; - ops += update.ops; - ops += truncate.ops; - ops += remove.ops; - -#define FINAL_OUTPUT(os, field, singular, ops, totalsecs) \ - os << "Executed " << field << " " #singular " operations (" \ - << PCT(field, ops) << "%) " << OPS_PER_SEC(field, totalsecs) \ - << " ops/sec" << std::endl - - FINAL_OUTPUT(os, read.ops, read, ops, totalsecs); - FINAL_OUTPUT(os, not_found.ops, not found, ops, totalsecs); - FINAL_OUTPUT(os, insert.ops, insert, ops, totalsecs); - FINAL_OUTPUT(os, update.ops, update, ops, totalsecs); - FINAL_OUTPUT(os, truncate.ops, truncate, ops, totalsecs); - FINAL_OUTPUT(os, remove.ops, remove, ops, totalsecs); -} - -void Stats::report(std::ostream &os) const { - os << read.ops << " reads"; - if (not_found.ops > 0) { - os << " (" << not_found.ops << " not found)"; - } - os << ", " << insert.ops << " inserts, "; - os << update.ops << " updates, "; - os << truncate.ops << " truncates, "; - os << remove.ops << " removes"; -} - -void Stats::smooth(const Stats &other) { - insert.smooth(other.insert); - not_found.smooth(other.not_found); - read.smooth(other.read); - remove.smooth(other.remove); - update.smooth(other.update); - truncate.smooth(other.truncate); -} - -void Stats::subtract(const Stats &other) { - insert.subtract(other.insert); - not_found.subtract(other.not_found); - read.subtract(other.read); - remove.subtract(other.remove); - update.subtract(other.update); - truncate.subtract(other.truncate); -} - -void Stats::track_latency(bool latency) { - insert.track_latency(latency); - not_found.track_latency(latency); - read.track_latency(latency); - remove.track_latency(latency); - update.track_latency(latency); - truncate.track_latency(latency); -} - -TableOptions::TableOptions() : key_size(0), value_size(0), _options() { - _options.add_int("key_size", key_size, - "default size of the key, unless overridden by Key.size"); - _options.add_int("value_size", value_size, - "default size of the value, unless overridden by Value.size"); -} -TableOptions::TableOptions(const TableOptions &other) : - key_size(other.key_size), value_size(other.value_size), - _options(other._options) {} -TableOptions::~TableOptions() {} - -Table::Table() : options(), _uri(), _internal(new TableInternal()) { -} -Table::Table(const char *uri) : options(), _uri(uri), - _internal(new TableInternal()) { -} -Table::Table(const Table &other) : options(other.options), _uri(other._uri), - _internal(new TableInternal(*other._internal)) { -} -Table::~Table() { delete _internal; } -Table& Table::operator=(const Table &other) { - options = other.options; - _uri = other._uri; - *_internal = *other._internal; - return (*this); -} - -void Table::describe(std::ostream &os) const { - os << "Table: " << _uri; -} - -TableInternal::TableInternal() : _tint(0), _context_count(0) {} -TableInternal::TableInternal(const TableInternal &other) : _tint(other._tint), - _context_count(other._context_count) {} -TableInternal::~TableInternal() {} - -WorkloadOptions::WorkloadOptions() : max_latency(0), - report_file("workload.stat"), report_interval(0), - run_time(0), sample_interval(0), sample_rate(1), - _options() { - _options.add_int("max_latency", max_latency, - "prints warning if any latency measured exceeds this number of " - "milliseconds. Requires sample_interval to be configured."); - _options.add_int("report_interval", report_interval, - "output throughput information every interval seconds, 0 to disable"); - _options.add_string("report_file", report_file, - "file name for collecting run output, " - "including output from the report_interval option. " - "The file name is relative to the connection's home directory. " - "When set to the empty string, stdout is used."); - _options.add_int("run_time", run_time, "total workload seconds"); - _options.add_int("sample_interval", sample_interval, - "performance logging every interval seconds, 0 to disable"); - _options.add_int("sample_rate", sample_rate, - "how often the latency of operations is measured. 1 for every operation, " - "2 for every second operation, 3 for every third operation etc."); -} - -WorkloadOptions::WorkloadOptions(const WorkloadOptions &other) : - max_latency(other.max_latency), report_interval(other.report_interval), - run_time(other.run_time), sample_interval(other.sample_interval), - sample_rate(other.sample_rate), _options(other._options) {} -WorkloadOptions::~WorkloadOptions() {} - -Workload::Workload(Context *context, const ThreadListWrapper &tlw) : - options(), stats(), _context(context), _threads(tlw._threads) { - if (context == NULL) - THROW("Workload contructor requires a Context"); -} - -Workload::Workload(Context *context, const Thread &thread) : - options(), stats(), _context(context), _threads() { - if (context == NULL) - THROW("Workload contructor requires a Context"); - _threads.push_back(thread); -} - -Workload::Workload(const Workload &other) : - options(other.options), stats(other.stats), _context(other._context), - _threads(other._threads) {} -Workload::~Workload() {} - -Workload& Workload::operator=(const Workload &other) { - options = other.options; - stats.assign(other.stats); - *_context = *other._context; - _threads = other._threads; - return (*this); -} - -int Workload::run(WT_CONNECTION *conn) { - WorkloadRunner runner(this); - - return (runner.run(conn)); -} - -WorkloadRunner::WorkloadRunner(Workload *workload) : - _workload(workload), _trunners(workload->_threads.size()), - _report_out(&std::cout), _start() { - ts_clear(_start); -} -WorkloadRunner::~WorkloadRunner() {} - -int WorkloadRunner::run(WT_CONNECTION *conn) { - WT_DECL_RET; - WorkloadOptions *options = &_workload->options; - std::ofstream report_out; - - _wt_home = conn->get_home(conn); - if (options->sample_interval > 0 && options->sample_rate <= 0) - THROW("Workload.options.sample_rate must be positive"); - if (!options->report_file.empty()) { - open_report_file(report_out, options->report_file.c_str(), - "Workload.options.report_file"); - _report_out = &report_out; - } - WT_ERR(create_all(conn, _workload->_context)); - WT_ERR(open_all()); - WT_ERR(ThreadRunner::cross_check(_trunners)); - WT_ERR(run_all()); - err: - //TODO: (void)close_all(); - _report_out = &std::cout; - return (ret); -} - -int WorkloadRunner::open_all() { - for (size_t i = 0; i < _trunners.size(); i++) { - WT_RET(_trunners[i].open_all()); - } - return (0); -} - -void WorkloadRunner::open_report_file(std::ofstream &of, const char *filename, - const char *desc) { - std::stringstream sstm; - - if (!_wt_home.empty()) - sstm << _wt_home << "/"; - sstm << filename; - of.open(sstm.str().c_str(), std::fstream::app); - if (!of) - THROW_ERRNO(errno, desc << ": \"" << sstm.str() - << "\" could not be opened"); -} - -int WorkloadRunner::create_all(WT_CONNECTION *conn, Context *context) { - for (size_t i = 0; i < _trunners.size(); i++) { - ThreadRunner *runner = &_trunners[i]; - std::stringstream sstm; - Thread *thread = &_workload->_threads[i]; - if (thread->options.name.empty()) { - sstm << "thread" << i; - thread->options.name = sstm.str(); - } - runner->_thread = thread; - runner->_context = context; - runner->_icontext = context->_internal; - runner->_workload = _workload; - runner->_wrunner = this; - runner->_number = (uint32_t)i; - // TODO: recover from partial failure here - WT_RET(runner->create_all(conn)); - } - WT_RET(context->_internal->create_all()); - return (0); -} - -int WorkloadRunner::close_all() { - for (size_t i = 0; i < _trunners.size(); i++) - _trunners[i].close_all(); - - return (0); -} - -void WorkloadRunner::get_stats(Stats *result) { - for (size_t i = 0; i < _trunners.size(); i++) - result->add(_trunners[i]._stats); -} - -void WorkloadRunner::report(time_t interval, time_t totalsecs, - Stats *prev_totals) { - std::ostream &out = *_report_out; - Stats new_totals(prev_totals->track_latency()); - - get_stats(&new_totals); - Stats diff(new_totals); - diff.subtract(*prev_totals); - prev_totals->assign(new_totals); - diff.report(out); - out << " in " << interval << " secs (" - << totalsecs << " total secs)" << std::endl; -} - -void WorkloadRunner::final_report(timespec &totalsecs) { - std::ostream &out = *_report_out; - Stats *stats = &_workload->stats; - - stats->clear(); - stats->track_latency(_workload->options.sample_interval > 0); - - get_stats(stats); - stats->final_report(out, totalsecs); - out << "Run completed: " << totalsecs << " seconds" << std::endl; -} - -int WorkloadRunner::run_all() { - void *status; - std::vector thread_handles; - Stats counts(false); - WorkgenException *exception; - WorkloadOptions *options = &_workload->options; - Monitor monitor(*this); - std::ofstream monitor_out; - std::ostream &out = *_report_out; - WT_DECL_RET; - - for (size_t i = 0; i < _trunners.size(); i++) - _trunners[i].get_static_counts(counts); - out << "Starting workload: " << _trunners.size() << " threads, "; - counts.report(out); - out << std::endl; - - workgen_epoch(&_start); - timespec end = _start + options->run_time; - timespec next_report = _start + options->report_interval; - - // Start all threads - if (options->sample_interval > 0) { - open_report_file(monitor_out, "monitor", "monitor output file"); - monitor._out = &monitor_out; - - if ((ret = pthread_create(&monitor._handle, NULL, monitor_main, - &monitor)) != 0) { - std::cerr << "monitor thread failed err=" << ret << std::endl; - return (ret); - } - } - - for (size_t i = 0; i < _trunners.size(); i++) { - pthread_t thandle; - ThreadRunner *runner = &_trunners[i]; - runner->_stop = false; - runner->_repeat = (options->run_time != 0); - if ((ret = pthread_create(&thandle, NULL, thread_runner_main, - runner)) != 0) { - std::cerr << "pthread_create failed err=" << ret << std::endl; - std::cerr << "Stopping all threads." << std::endl; - for (size_t j = 0; j < thread_handles.size(); j++) { - _trunners[j]._stop = true; - (void)pthread_join(thread_handles[j], &status); - _trunners[j].close_all(); - } - return (ret); - } - thread_handles.push_back(thandle); - runner->_stats.clear(); - } - - // Let the test run, reporting as needed. - Stats curstats(false); - timespec now = _start; - while (now < end) { - timespec sleep_amt; - - sleep_amt = end - now; - if (next_report != 0) { - timespec next_diff = next_report - now; - if (next_diff < next_report) - sleep_amt = next_diff; - } - if (sleep_amt.tv_sec > 0) - sleep((unsigned int)sleep_amt.tv_sec); - else - usleep((useconds_t)((sleep_amt.tv_nsec + 999)/ 1000)); - - workgen_epoch(&now); - if (now >= next_report && now < end && options->report_interval != 0) { - report(options->report_interval, (now - _start).tv_sec, &curstats); - while (now >= next_report) - next_report += options->report_interval; - } - } - - // signal all threads to stop - if (options->run_time != 0) - for (size_t i = 0; i < _trunners.size(); i++) - _trunners[i]._stop = true; - if (options->sample_interval > 0) - monitor._stop = true; - - // wait for all threads - exception = NULL; - for (size_t i = 0; i < _trunners.size(); i++) { - WT_TRET(pthread_join(thread_handles[i], &status)); - if (_trunners[i]._errno != 0) - VERBOSE(_trunners[i], - "Thread " << i << " has errno " << _trunners[i]._errno); - WT_TRET(_trunners[i]._errno); - _trunners[i].close_all(); - if (exception == NULL && !_trunners[i]._exception._str.empty()) - exception = &_trunners[i]._exception; - } - if (options->sample_interval > 0) { - WT_TRET(pthread_join(monitor._handle, &status)); - if (monitor._errno != 0) - std::cerr << "Monitor thread has errno " << monitor._errno - << std::endl; - if (exception == NULL && !monitor._exception._str.empty()) - exception = &monitor._exception; - } - - // issue the final report - timespec finalsecs = now - _start; - final_report(finalsecs); - - if (ret != 0) - std::cerr << "run_all failed err=" << ret << std::endl; - (*_report_out) << std::endl; - if (exception != NULL) - throw *exception; - return (ret); -} - -}; diff --git a/bench/workgen/workgen.h b/bench/workgen/workgen.h deleted file mode 100644 index c1ae01ed5a4..00000000000 --- a/bench/workgen/workgen.h +++ /dev/null @@ -1,410 +0,0 @@ -/*- - * Public Domain 2014-2017 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. - * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include -#include - -namespace workgen { - -struct ContextInternal; -struct TableInternal; -struct Thread; -struct Transaction; - -#ifndef SWIG -struct OptionsList { - OptionsList(); - OptionsList(const OptionsList &other); - - void add_int(const char *name, int default_value, const char *desc); - void add_bool(const char *name, bool default_value, const char *desc); - void add_double(const char *name, double default_value, const char *desc); - void add_string(const char *name, const std::string &default_value, - const char *desc); - - std::string help() const; - std::string help_description(const char *option_name) const; - std::string help_type(const char *option_name) const; - -private: - void add_option(const char *name, const std::string typestr, - const char *desc); - typedef std::pair TypeDescPair; - std::map _option_map; -}; -#endif - -// These classes are all exposed to Python via SWIG. While they may contain -// data that is private to C++, such data must not prevent the objects from -// being shared. Tables, Keys, Values, Operations and Threads can be shared: a -// single Key object might appear in many operations; Operations may appear -// multiple times in a Thread or in different Threads; the same Thread may -// appear multiple times in a Workload list, etc. -// -// Certain kinds of state are allowed: A Table contains a unique pointer that -// is used within the internal part of the Context. Stats contain lots -// of state, but is made available after a Workload.run(). -// -// Python controls the lifetime of (nearly) all objects of these classes. -// The exception is Stat/Track objects, which are also created/used -// internally to calculate and show statistics during a run. -// -struct Track { - // Threads maintain the total thread operation and total latency they've - // experienced. - - uint64_t ops; // Total operations */ - uint64_t latency_ops; // Total ops sampled for latency - uint64_t latency; // Total latency */ - - // Minimum/maximum latency, shared with the monitor thread, that is, the - // monitor thread clears it so it's recalculated again for each period. - - uint32_t min_latency; // Minimum latency (uS) - uint32_t max_latency; // Maximum latency (uS) - - Track(bool latency_tracking = false); - Track(const Track &other); - ~Track(); - - void add(Track&, bool reset = false); - void assign(const Track&); - uint64_t average_latency() const; - void clear(); - void incr(); - void incr_with_latency(uint64_t usecs); - void smooth(const Track&); - void subtract(const Track&); - void track_latency(bool); - bool track_latency() const { return (us != NULL); } - - void _get_us(long *); - void _get_ms(long *); - void _get_sec(long *); - -private: - // Latency buckets. From python, accessed via methods us(), ms(), sec() - uint32_t *us; // < 1us ... 1000us - uint32_t *ms; // < 1ms ... 1000ms - uint32_t *sec; // < 1s 2s ... 100s - - Track & operator=(const Track &other); // use explicit assign method -}; - -struct Stats { - Track insert; - Track not_found; - Track read; - Track remove; - Track update; - Track truncate; - - Stats(bool latency = false); - Stats(const Stats &other); - ~Stats(); - - void add(Stats&, bool reset = false); - void assign(const Stats&); - void clear(); - void describe(std::ostream &os) const; -#ifndef SWIG - void final_report(std::ostream &os, timespec &totalsecs) const; - void report(std::ostream &os) const; -#endif - void smooth(const Stats&); - void subtract(const Stats&); - void track_latency(bool); - bool track_latency() const { return (insert.track_latency()); } - -private: - Stats & operator=(const Stats &other); // use explicit assign method -}; - -// A Context tracks the current record number for each uri, used -// for key generation. -// -struct Context { - bool _verbose; - ContextInternal *_internal; - - Context(); - ~Context(); - void describe(std::ostream &os) const { - os << "Context: verbose " << (_verbose ? "true" : "false"); - } - -#ifndef SWIG - Context& operator=(const Context &other); -#endif -}; - -// To prevent silent errors, this class is set up in Python so that new -// properties are prevented, only existing properties can be set. -// -struct TableOptions { - int key_size; - int value_size; - - TableOptions(); - TableOptions(const TableOptions &other); - ~TableOptions(); - - void describe(std::ostream &os) const { - os << "key_size " << key_size; - os << ", value_size " << value_size; - } - - std::string help() const { return _options.help(); } - std::string help_description(const char *option_name) const { - return _options.help_description(option_name); } - std::string help_type(const char *option_name) const { - return _options.help_type(option_name); } - -private: - OptionsList _options; -}; - -struct Table { - TableOptions options; - std::string _uri; - TableInternal *_internal; - - /* XXX select table from range */ - - Table(); - Table(const char *tablename); - Table(const Table &other); - ~Table(); - - void describe(std::ostream &os) const; - -#ifndef SWIG - Table& operator=(const Table &other); -#endif -}; - -struct Key { - typedef enum { - KEYGEN_AUTO, KEYGEN_APPEND, KEYGEN_PARETO, KEYGEN_UNIFORM } KeyType; - KeyType _keytype; - int _size; - - /* XXX specify more about key distribution */ - Key() : _keytype(KEYGEN_AUTO), _size(0) {} - Key(KeyType keytype, int size) : _keytype(keytype), _size(size) {} - Key(const Key &other) : _keytype(other._keytype), _size(other._size) {} - ~Key() {} - - void describe(std::ostream &os) const { - os << "Key: type " << _keytype << ", size " << _size; } -}; - -struct Value { - int _size; - - /* XXX specify how value is calculated */ - Value() : _size(0) {} - Value(int size) : _size(size) {} - Value(const Value &other) : _size(other._size) {} - ~Value() {} - - void describe(std::ostream &os) const { os << "Value: size " << _size; } -}; - -struct Operation { - enum OpType { - OP_NONE, OP_INSERT, OP_REMOVE, OP_SEARCH, OP_UPDATE }; - OpType _optype; - - Table _table; - Key _key; - Value _value; - Transaction *_transaction; - std::vector *_group; - int _repeatgroup; - -#ifndef SWIG - int _keysize; // derived from Key._size and Table.options.key_size - int _valuesize; - uint64_t _keymax; - uint64_t _valuemax; -#endif - - Operation(); - Operation(OpType optype, Table table, Key key, Value value); - Operation(OpType optype, Table table, Key key); - Operation(OpType optype, Table table); - Operation(const Operation &other); - ~Operation(); - - void describe(std::ostream &os) const; -#ifndef SWIG - Operation& operator=(const Operation &other); - void get_static_counts(Stats &stats, int multiplier); - void kv_compute_max(bool); - void kv_gen(bool, uint64_t, char *) const; - void kv_size_buffer(bool iskey, size_t &size) const; - void size_check() const; -#endif -}; - -// To prevent silent errors, this class is set up in Python so that new -// properties are prevented, only existing properties can be set. -// -struct ThreadOptions { - std::string name; - double throttle; - double throttle_burst; - - ThreadOptions(); - ThreadOptions(const ThreadOptions &other); - ~ThreadOptions(); - - void describe(std::ostream &os) const { - os << "throttle " << throttle; - } - - std::string help() const { return _options.help(); } - std::string help_description(const char *option_name) const { - return _options.help_description(option_name); } - std::string help_type(const char *option_name) const { - return _options.help_type(option_name); } - -private: - OptionsList _options; -}; - -// This is a list of threads, which may be used in the Workload constructor. -// It participates with ThreadList defined on the SWIG/Python side and -// some Python operators added to Thread to allow Threads to be easily -// composed using '+' and multiplied (by integer counts) using '*'. -// Users of the workgen API in Python don't ever need to use -// ThreadListWrapper or ThreadList. -struct ThreadListWrapper { - std::vector _threads; - - ThreadListWrapper() : _threads() {} - ThreadListWrapper(const ThreadListWrapper &other) : - _threads(other._threads) {} - ThreadListWrapper(const std::vector &threads) : _threads(threads) {} - void extend(const ThreadListWrapper &); - void append(const Thread &); - void multiply(const int); -}; - -struct Thread { - ThreadOptions options; - Operation _op; - - Thread(); - Thread(const Operation &op); - Thread(const Thread &other); - ~Thread(); - - void describe(std::ostream &os) const; -}; - -struct Transaction { - bool _rollback; - std::string _begin_config; - std::string _commit_config; - - Transaction(const char *_config = NULL) : _rollback(false), - _begin_config(_config == NULL ? "" : _config), _commit_config() {} - - void describe(std::ostream &os) const { - os << "Transaction: "; - if (_rollback) - os << "(rollback) "; - os << "begin_config: " << _begin_config; - if (!_commit_config.empty()) - os << ", commit_config: " << _commit_config; - } -}; - -// To prevent silent errors, this class is set up in Python so that new -// properties are prevented, only existing properties can be set. -// -struct WorkloadOptions { - int max_latency; - std::string report_file; - int report_interval; - int run_time; - int sample_interval; - int sample_rate; - - WorkloadOptions(); - WorkloadOptions(const WorkloadOptions &other); - ~WorkloadOptions(); - - void describe(std::ostream &os) const { - os << "run_time " << run_time; - os << ", report_interval " << report_interval; - } - - std::string help() const { return _options.help(); } - std::string help_description(const char *option_name) const { - return _options.help_description(option_name); } - std::string help_type(const char *option_name) const { - return _options.help_type(option_name); } - -private: - OptionsList _options; -}; - -struct Workload { - WorkloadOptions options; - Stats stats; - Context *_context; - std::vector _threads; - - Workload(Context *context, const ThreadListWrapper &threadlist); - Workload(Context *context, const Thread &thread); - Workload(const Workload &other); - ~Workload(); - -#ifndef SWIG - Workload& operator=(const Workload &other); -#endif - - void describe(std::ostream &os) const { - os << "Workload: "; - _context->describe(os); - os << ", "; - options.describe(os); - os << ", [" << std::endl; - for (std::vector::const_iterator i = _threads.begin(); i != _threads.end(); i++) { - os << " "; i->describe(os); os << std::endl; - } - os << "]"; - } - int run(WT_CONNECTION *conn); -}; - -}; diff --git a/bench/workgen/workgen.swig b/bench/workgen/workgen.swig deleted file mode 100644 index 0f74942169c..00000000000 --- a/bench/workgen/workgen.swig +++ /dev/null @@ -1,233 +0,0 @@ -/*- - * Public Domain 2014-2017 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. - * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * workgen.swig - * The SWIG interface file defining the workgen python API. - */ - -%include "typemaps.i" -%include "std_vector.i" -%include "std_string.i" -%include "stdint.i" -%include "attribute.i" -%include "carrays.i" - -/* We only need to reference WiredTiger types. */ -%import "wiredtiger.h" - -%{ -#include -#include -#include -#include "wiredtiger.h" -#include "workgen.h" -#include "workgen_int.h" -%} - -%pythoncode %{ -import numbers -%} - -%exception { - try { - $action - } - catch (workgen::WorkgenException &wge) { - SWIG_exception_fail(SWIG_RuntimeError, wge._str.c_str()); - } -} - -/* - * Some functions are long running, turn off signal handling that was enabled - * by the Python interpreter. This means that a signal handler coded in Python - * won't work when spanning a call to one of these long running functions, but - * it's doubtful our test scripts need signals at all. This could be made to - * work, it's just not worth the trouble. - */ -%define InterruptableFunction(funcname) -%exception funcname { - try { - void (*savesig)(int) = signal(SIGINT, SIG_DFL); - $action - (void)signal(SIGINT, savesig); - } - catch (workgen::WorkgenException &wge) { - SWIG_exception_fail(SWIG_RuntimeError, wge._str.c_str()); - } -} -%enddef - -/* - * Define a __str__ function for all public workgen classes. - */ -%define WorkgenClass(classname) -%extend workgen::classname { - const std::string __str__() { - std::ostringstream out; - $self->describe(out); - return out.str(); - } -}; -%enddef - -/* - * To forestall errors, make it impossible to add new attributes to certain - * classes. This trick relies on the implementation of SWIG providing - * predictably named functions in the _workgen namespace to set attributes. - */ -%define WorkgenFrozenClass(classname) -%extend workgen::classname { -%pythoncode %{ - def __setattr__(self, attr, val): - if getattr(self, attr) == None: - raise AttributeError("'" + #classname + - "' object has no attribute '" + attr + "'") - f = _workgen.__dict__[#classname + '_' + attr + '_set'] - f(self, val) -%} -}; -%enddef - -InterruptableFunction(workgen::execute) -InterruptableFunction(workgen::Workload::run) - -%module workgen -/* Parse the header to generate wrappers. */ -%include "workgen.h" - -%template(OpList) std::vector; -%template(ThreadList) std::vector; -%array_class(uint32_t, uint32Array); -%array_class(long, longArray); - -WorkgenClass(Key) -WorkgenClass(Operation) -WorkgenClass(Stats) -WorkgenClass(Table) -WorkgenClass(TableOptions) -WorkgenClass(Thread) -WorkgenClass(ThreadOptions) -WorkgenClass(Transaction) -WorkgenClass(Value) -WorkgenClass(Workload) -WorkgenClass(WorkloadOptions) -WorkgenClass(Context) - -WorkgenFrozenClass(TableOptions) -WorkgenFrozenClass(ThreadOptions) -WorkgenFrozenClass(WorkloadOptions) - -%extend workgen::Operation { -%pythoncode %{ - def __mul__(self, other): - if not isinstance(other, numbers.Integral): - raise Exception('Operation.__mul__ requires an integral number') - op = Operation() - op._group = OpList([self]) - op._repeatgroup = other - return op - - __rmul__ = __mul__ - - def __add__(self, other): - if not isinstance(other, Operation): - raise Exception('Operation.__sum__ requires an Operation') - if self._group == None or self._repeatgroup != 1 or self._transaction != None: - op = Operation() - op._group = OpList([self, other]) - op._repeatgroup = 1 - return op - else: - self._group.append(other) - return self -%} -}; - -%extend workgen::Thread { -%pythoncode %{ - def __mul__(self, other): - if not isinstance(other, numbers.Integral): - raise Exception('Thread.__mul__ requires an integral number') - return ThreadListWrapper(ThreadList([self] * other)) - - __rmul__ = __mul__ - - def __add__(self, other): - if type(self) != type(other): - raise Exception('Thread.__sum__ requires an Thread') - return ThreadListWrapper(ThreadList([self, other])) -%} -}; - -%extend workgen::ThreadListWrapper { -%pythoncode %{ - def __mul__(self, other): - if not isinstance(other, numbers.Integral): - raise Exception('ThreadList.__mul__ requires an integral number') - tlw = ThreadListWrapper(self) - tlw.multiply(other) - return tlw - - __rmul__ = __mul__ - - def __add__(self, other): - tlw = ThreadListWrapper(self) - if isinstance(other, ThreadListWrapper): - tlw.extend(other) - elif isinstance(other, Thread): - tlw.append(other) - else: - raise Exception('ThreadList.__sum__ requires an Thread or ThreadList') - return tlw -%} -}; - -%extend workgen::Track { -%pythoncode %{ - def __longarray(self, size): - result = longArray(size) - result.__len__ = lambda: size - return result - - def us(self): - result = self.__longarray(1000) - self._get_us(result) - return result - - def ms(self): - result = self.__longarray(1000) - self._get_ms(result) - return result - - def sec(self): - result = self.__longarray(100) - self._get_sec(result) - return result -%} -}; diff --git a/bench/workgen/workgen/__init__.py b/bench/workgen/workgen/__init__.py deleted file mode 100644 index 00e8f257546..00000000000 --- a/bench/workgen/workgen/__init__.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-2016 MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# __init__.py -# initialization for workgen module -# -import os, sys - -# After importing the SWIG-generated file, copy all symbols from from it -# to this module so they will appear in the workgen namespace. -me = sys.modules[__name__] -sys.path.append(os.path.dirname(__file__)) # needed for Python3 -import workgen, workgen_util -for module in workgen: - for name in dir(module): - value = getattr(module, name) - setattr(me, name, value) diff --git a/bench/workgen/workgen_func.c b/bench/workgen/workgen_func.c deleted file mode 100644 index 6a465855875..00000000000 --- a/bench/workgen/workgen_func.c +++ /dev/null @@ -1,86 +0,0 @@ -/*- - * Public Domain 2014-2017 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. - * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#include "wiredtiger.h" -#include "test_util.h" -#include "workgen_func.h" - -/* This is an opaque type handle. */ -typedef struct workgen_random_state {} workgen_random_state; - -/* - * These functions call their WiredTiger equivalents. - */ -uint32_t -workgen_atomic_add32(uint32_t *vp, uint32_t v) -{ - return (__wt_atomic_add32(vp, v)); -} - -uint64_t -workgen_atomic_add64(uint64_t *vp, uint64_t v) -{ - return (__wt_atomic_add64(vp, v)); -} - -void -workgen_epoch(struct timespec *tsp) -{ - __wt_epoch(NULL, tsp); -} - -uint32_t -workgen_random(workgen_random_state volatile * rnd_state) -{ - return (__wt_random((WT_RAND_STATE *)rnd_state)); -} - -int -workgen_random_alloc(WT_SESSION *session, workgen_random_state **rnd_state) -{ - WT_RAND_STATE *state; - state = malloc(sizeof(WT_RAND_STATE)); - if (state == NULL) { - *rnd_state = NULL; - return (ENOMEM); - } - __wt_random_init_seed((WT_SESSION_IMPL *)session, state); - *rnd_state = (workgen_random_state *)state; - return (0); -} - -void -workgen_random_free(workgen_random_state *rnd_state) -{ - free(rnd_state); -} - -extern void -workgen_u64_to_string_zf(uint64_t n, char *buf, size_t len) -{ - u64_to_string_zf(n, buf, len); -} diff --git a/bench/workgen/workgen_func.h b/bench/workgen/workgen_func.h deleted file mode 100644 index 20ebf2632cc..00000000000 --- a/bench/workgen/workgen_func.h +++ /dev/null @@ -1,44 +0,0 @@ -/*- - * Public Domain 2014-2017 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. - * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -struct workgen_random_state; - -extern uint32_t -workgen_atomic_add32(uint32_t *vp, uint32_t v); -extern uint64_t -workgen_atomic_add64(uint64_t *vp, uint64_t v); -extern void -workgen_epoch(struct timespec *tsp); -extern uint32_t -workgen_random(struct workgen_random_state volatile *rnd_state); -extern int -workgen_random_alloc(WT_SESSION *session, - struct workgen_random_state **rnd_state); -extern void -workgen_random_free(struct workgen_random_state *rnd_state); -extern void -workgen_u64_to_string_zf(uint64_t n, char *buf, size_t len); diff --git a/bench/workgen/workgen_int.h b/bench/workgen/workgen_int.h deleted file mode 100644 index 01fb727691b..00000000000 --- a/bench/workgen/workgen_int.h +++ /dev/null @@ -1,205 +0,0 @@ -/*- - * Public Domain 2014-2017 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. - * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include -#include -#include -#ifndef SWIG -extern "C" { -#include "workgen_func.h" -} -#endif - -namespace workgen { - -// A 'tint' or ('table integer') is a unique small value integer -// assigned to each table URI in use. Currently, we assign it once, -// and its value persists through the lifetime of the Context. -typedef uint32_t tint_t; - -struct ThreadRunner; -struct WorkloadRunner; - -// A exception generated by the workgen classes. Methods generally return an -// int errno, so this is useful primarily for notifying the caller about -// failures in constructors. -struct WorkgenException { - std::string _str; - WorkgenException() : _str() {} - WorkgenException(int err, const char *msg = NULL) : _str() { - if (err != 0) - _str += wiredtiger_strerror(err); - if (msg != NULL) { - if (!_str.empty()) - _str += ": "; - _str += msg; - } - } - WorkgenException(const WorkgenException &other) : _str(other._str) {} - ~WorkgenException() {} -}; - -struct Throttle { - ThreadRunner &_runner; - double _throttle; - double _burst; - timespec _next_div; - int64_t _ops_delta; - uint64_t _ops_prev; // previously returned value - uint64_t _ops_per_div; // statically calculated. - uint64_t _ms_per_div; // statically calculated. - bool _started; - - Throttle(ThreadRunner &runner, double throttle, double burst); - ~Throttle(); - - // Called with the number of operations since the last throttle. - // Sleeps for any needed amount and returns the number operations the - // caller should perform before the next call to throttle. - int throttle(uint64_t op_count, uint64_t *op_limit); -}; - -// There is one of these per Thread object. It exists for the duration of a -// call to Workload::run() method. -struct ThreadRunner { - int _errno; - WorkgenException _exception; - Thread *_thread; - Context *_context; - ContextInternal *_icontext; - Workload *_workload; - WorkloadRunner *_wrunner; - workgen_random_state *_rand_state; - Throttle *_throttle; - uint64_t _throttle_ops; - uint64_t _throttle_limit; - bool _in_transaction; - uint32_t _number; - Stats _stats; - - typedef enum { - USAGE_READ = 0x1, USAGE_WRITE = 0x2, USAGE_MIXED = 0x4 } Usage; - std::map _table_usage; // value is Usage - WT_CURSOR **_cursors; // indexed by tint_t - volatile bool _stop; - WT_SESSION *_session; - char *_keybuf; - char *_valuebuf; - bool _repeat; - - ThreadRunner(); - ~ThreadRunner(); - - void free_all(); - static int cross_check(std::vector &runners); - - int close_all(); - int create_all(WT_CONNECTION *conn); - void get_static_counts(Stats &); - int open_all(); - int run(); - - void op_create_all(Operation *, size_t &keysize, size_t &valuesize); - uint64_t op_get_key_recno(Operation *, tint_t tint); - void op_get_static_counts(Operation *, Stats &, int); - int op_run(Operation *); - -#ifdef _DEBUG - std::stringstream _debug_messages; - std::string get_debug(); -#define DEBUG_CAPTURE(runner, expr) runner._debug_messages << expr -#else -#define DEBUG_CAPTURE(runner, expr) -#endif -}; - -struct Monitor { - int _errno; - WorkgenException _exception; - WorkloadRunner &_wrunner; - volatile bool _stop; - pthread_t _handle; - std::ostream *_out; - - Monitor(WorkloadRunner &wrunner); - ~Monitor(); - int run(); -}; - -struct ContextInternal { - std::map _tint; // maps uri -> tint_t - std::map _table_names; // reverse mapping - uint64_t *_recno; // # entries per tint_t - uint32_t _recno_alloced; // length of allocated _recno - tint_t _tint_last; // last tint allocated - // unique id per context, to work with multiple contexts, starts at 1. - uint32_t _context_count; - - ContextInternal(); - ~ContextInternal(); - int create_all(); -}; - -struct TableInternal { - tint_t _tint; - uint32_t _context_count; - - TableInternal(); - TableInternal(const TableInternal &other); - ~TableInternal(); -}; - -// An instance of this class only exists for the duration of one call to a -// Workload::run() method. -struct WorkloadRunner { - Workload *_workload; - std::vector _trunners; - std::ostream *_report_out; - std::string _wt_home; - timespec _start; - - WorkloadRunner(Workload *); - ~WorkloadRunner(); - int run(WT_CONNECTION *conn); - -private: - int close_all(); - int create_all(WT_CONNECTION *conn, Context *context); - void final_report(timespec &); - void get_stats(Stats *stats); - int open_all(); - void open_report_file(std::ofstream &, const char *, const char *); - void report(time_t, time_t, Stats *stats); - int run_all(); - - WorkloadRunner(const WorkloadRunner &); // disallowed - WorkloadRunner& operator=(const WorkloadRunner &other); // disallowed -}; - -}; diff --git a/bench/workgen/workgen_time.h b/bench/workgen/workgen_time.h deleted file mode 100644 index f33eb64d9c9..00000000000 --- a/bench/workgen/workgen_time.h +++ /dev/null @@ -1,201 +0,0 @@ -/*- - * Public Domain 2014-2017 MongoDB, Inc. - * Public Domain 2008-2014 WiredTiger, Inc. - * - * This is free and unencumbered software released into the public domain. - * - * Anyone is free to copy, modify, publish, use, compile, sell, or - * distribute this software, either in source code form or as a compiled - * binary, for any purpose, commercial or non-commercial, and by any - * means. - * - * In jurisdictions that recognize copyright laws, the author or authors - * of this software dedicate any and all copyright interest in the - * software to the public domain. We make this dedication for the benefit - * of the public at large and to the detriment of our heirs and - * successors. We intend this dedication to be an overt act of - * relinquishment in perpetuity of all present and future rights to this - * software under copyright law. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#define THOUSAND (1000ULL) -#define MILLION (1000000ULL) -#define BILLION (1000000000ULL) - -#define NSEC_PER_SEC BILLION -#define USEC_PER_SEC MILLION -#define MSEC_PER_SEC THOUSAND - -#define ns_to_ms(v) ((v) / MILLION) -#define ns_to_sec(v) ((v) / BILLION) -#define ns_to_us(v) ((v) / THOUSAND) - -#define us_to_ms(v) ((v) / THOUSAND) -#define us_to_ns(v) ((v) * THOUSAND) -#define us_to_sec(v) ((v) / MILLION) - -#define ms_to_ns(v) ((v) * MILLION) -#define ms_to_us(v) ((v) * THOUSAND) -#define ms_to_sec(v) ((v) / THOUSAND) - -#define sec_to_ns(v) ((v) * BILLION) -#define sec_to_us(v) ((v) * MILLION) -#define sec_to_ms(v) ((v) * THOUSAND) - -inline std::ostream& -operator<<(std::ostream &os, const timespec &ts) -{ - char oldfill; - std::streamsize oldwidth; - - os << ts.tv_sec << "."; - oldfill = os.fill('0'); - oldwidth = os.width(3); - os << (int)ns_to_ms(ts.tv_nsec); - os.fill(oldfill); - os.width(oldwidth); - return (os); -} - -inline timespec -operator-(const timespec &lhs, const timespec &rhs) -{ - timespec ts; - - if (lhs.tv_nsec < rhs.tv_nsec) { - ts.tv_sec = lhs.tv_sec - rhs.tv_sec - 1; - ts.tv_nsec = lhs.tv_nsec - rhs.tv_nsec + NSEC_PER_SEC; - } else { - ts.tv_sec = lhs.tv_sec - rhs.tv_sec; - ts.tv_nsec = lhs.tv_nsec - rhs.tv_nsec; - } - return (ts); -} - -inline timespec -operator+(const timespec &lhs, const int n) -{ - timespec ts = lhs; - ts.tv_sec += n; - return (ts); -} - -inline bool -operator<(const timespec &lhs, const timespec &rhs) -{ - if (lhs.tv_sec == rhs.tv_sec) - return (lhs.tv_nsec < rhs.tv_nsec); - else - return (lhs.tv_sec < rhs.tv_sec); -} - -inline bool -operator>(const timespec &lhs, const timespec &rhs) -{ - if (lhs.tv_sec == rhs.tv_sec) - return (lhs.tv_nsec > rhs.tv_nsec); - else - return (lhs.tv_sec > rhs.tv_sec); -} - -inline bool -operator>=(const timespec &lhs, const timespec &rhs) -{ - return (!(lhs < rhs)); -} - -inline bool -operator<=(const timespec &lhs, const timespec &rhs) -{ - return (!(lhs > rhs)); -} - -inline bool -operator==(const timespec &lhs, int n) -{ - return (lhs.tv_sec == n && lhs.tv_nsec == 0); -} - -inline bool -operator!=(const timespec &lhs, int n) -{ - return (lhs.tv_sec != n || lhs.tv_nsec != 0); -} - -inline timespec & -operator+=(timespec &lhs, const int n) -{ - lhs.tv_sec += n; - return (lhs); -} - -inline bool -operator==(const timespec &lhs, const timespec &rhs) -{ - return (lhs.tv_sec == rhs.tv_sec && lhs.tv_nsec == rhs.tv_nsec); -} - -inline timespec & -operator-=(timespec &lhs, const timespec &rhs) -{ - lhs.tv_sec -= rhs.tv_sec; - lhs.tv_nsec -= rhs.tv_nsec; - if (lhs.tv_nsec < 0) { - lhs.tv_nsec += NSEC_PER_SEC; - lhs.tv_sec -= 1; - } - return (lhs); -} - -inline timespec -ts_add_ms(const timespec &lhs, const uint64_t n) -{ - timespec ts; - - ts.tv_sec = lhs.tv_sec + ms_to_sec(n); - ts.tv_nsec = lhs.tv_nsec + ms_to_ns(n % THOUSAND); - while ((unsigned long)ts.tv_nsec > NSEC_PER_SEC) { - ts.tv_nsec -= NSEC_PER_SEC; - ts.tv_sec++; - } - return (ts); -} - -inline void -ts_assign(timespec &lhs, const timespec &rhs) -{ - lhs.tv_sec = rhs.tv_sec; - lhs.tv_nsec = rhs.tv_nsec; -} - -inline void -ts_clear(timespec &ts) -{ - ts.tv_sec = 0; - ts.tv_nsec = 0; -} - -inline uint64_t -ts_sec(const timespec &ts) -{ - return (ns_to_sec(ts.tv_nsec) + ts.tv_sec); -} - -inline uint64_t -ts_ms(const timespec &ts) -{ - return (ns_to_ms(ts.tv_nsec) + sec_to_ms(ts.tv_sec)); -} - -inline uint64_t -ts_us(const timespec &ts) -{ - return (ns_to_us(ts.tv_nsec) + sec_to_us(ts.tv_sec)); -} diff --git a/build_posix/Make.subdirs b/build_posix/Make.subdirs index 5e30b28b3d6..4ecec37ca6c 100644 --- a/build_posix/Make.subdirs +++ b/build_posix/Make.subdirs @@ -45,5 +45,4 @@ test/syscall test/thread # Benchmark programs. -bench/workgen PYTHON bench/wtperf diff --git a/dist/s_string.ok b/dist/s_string.ok index 7c409e0e46d..ce4e9f963b0 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -353,8 +353,6 @@ TORTIOUS TSO TXN TXNC -ThreadList -ThreadListWrapper Timespec Timestamp TryCV @@ -1196,7 +1194,6 @@ txnid txnmin txt typedef -typemaps uB uS ui @@ -1277,7 +1274,6 @@ whitespace wiredTiger wiredtiger workFactor -workgen wrapup writeable writelock diff --git a/dist/s_whitespace b/dist/s_whitespace index 874074dfb50..0de59bc5825 100755 --- a/dist/s_whitespace +++ b/dist/s_whitespace @@ -8,7 +8,6 @@ trap 'rm -f $t' 0 1 2 3 13 15 # into a single line, discard trailing empty lines. whitespace() { - ! head $1 | grep -q 'automatically generated by SWIG' || return sed -e 's/[ ][ ]*$//' < $1 | \ cat -s | \ sed -e '${' -e '/^$/d' -e '}' > $t -- cgit v1.2.1 From 32d9aecda006bac401645679c04fcaa6d855e213 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 11 May 2017 07:52:20 -0400 Subject: WT-3312 encryption btree configuration test (#3413) * WT-3312 encryption btree configuration test Don't check for the metadata file explicitly (it implies we should check other special files too, like the LAS file). If there's no explicit encryption argument, default to the connection's value, that includes the metadata file and the LAS file. * Remove my added line in the comment, it didn't add anything useful. --- src/btree/bt_handle.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 687a77aaa65..e4780f1bf42 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -418,15 +418,13 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) WT_RET(__wt_compressor_config(session, &cval, &btree->compressor)); /* - * We do not use __wt_config_gets_none here because "none" - * and the empty string have different meanings. The - * empty string means inherit the system encryption setting - * and "none" means this table is in the clear even if the - * database is encrypted. If this is the metadata handle - * always inherit from the connection. + * We do not use __wt_config_gets_none here because "none" and the empty + * string have different meanings. The empty string means inherit the + * system encryption setting and "none" means this table is in the clear + * even if the database is encrypted. */ WT_RET(__wt_config_gets(session, cfg, "encryption.name", &cval)); - if (WT_IS_METADATA(btree->dhandle) || cval.len == 0) + if (cval.len == 0) btree->kencryptor = conn->kencryptor; else if (WT_STRING_MATCH("none", cval.str, cval.len)) btree->kencryptor = NULL; -- cgit v1.2.1 From 1b13a79675ef4064b8301b0bb64378b2cc642a01 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 11 May 2017 21:19:55 -0400 Subject: WT-3158 Fix structure layout on Windows. (#3416) We use a pragma on Windows to force a struct to be packed, but were missing the "end" pragma that restores normal layout. The result was that most structs were being packed, leading to poor performance for workloads (particularly when accessing session structures). --- dist/s_style | 10 ++++++---- src/include/btmem.h | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/dist/s_style b/dist/s_style index 388a481ef56..54a88bbeb4c 100755 --- a/dist/s_style +++ b/dist/s_style @@ -101,10 +101,12 @@ else cat $t fi - # Alignment directive before "struct". - egrep 'WT_COMPILER_TYPE_ALIGN.*struct' $f > $t - test -s $t && { - echo "$f: compiler alignment direction must precede \"struct\"" + # If we don't have matching pack-begin and pack-end calls, we don't get + # an error, we just get a Windows performance regression. + egrep WT_PACKED_STRUCT $f > $t + cnt=`wc -l < $t` + test `expr "$cnt" % 2` -ne 0 && { + echo "$f: mismatched WT_PACKED_STRUCT_BEGIN/END lines" cat $t } diff --git a/src/include/btmem.h b/src/include/btmem.h index 9db764fa864..6755db81007 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -938,7 +938,7 @@ WT_PACKED_STRUCT_BEGIN(__wt_update) #define WT_UPDATE_MEMSIZE(upd) \ WT_ALIGN(sizeof(WT_UPDATE) + (WT_UPDATE_DELETED_ISSET(upd) || \ WT_UPDATE_RESERVED_ISSET(upd) ? 0 : (upd)->size), 32) -}; +WT_PACKED_STRUCT_END /* * WT_INSERT -- -- cgit v1.2.1 From d2db615982db186e0291f05da9dbe758d36b20d3 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 12 May 2017 08:25:05 -0400 Subject: WT-3158 Fix structure layout on Windows. (#3417) Use awk instead of wc to get a count of lines, awk never includes whitespace in the output. --- dist/s_style | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dist/s_style b/dist/s_style index 54a88bbeb4c..69cf1f667fa 100755 --- a/dist/s_style +++ b/dist/s_style @@ -102,9 +102,10 @@ else fi # If we don't have matching pack-begin and pack-end calls, we don't get - # an error, we just get a Windows performance regression. + # an error, we just get a Windows performance regression. Using awk and + # not wc to ensure there's no whitespace in the assignment. egrep WT_PACKED_STRUCT $f > $t - cnt=`wc -l < $t` + cnt=`awk 'BEGIN { line = 0 } { ++line } END { print line }' < $t` test `expr "$cnt" % 2` -ne 0 && { echo "$f: mismatched WT_PACKED_STRUCT_BEGIN/END lines" cat $t -- cgit v1.2.1 From 9b60343ed70a47233bcb90cc7ff2aa256dd272ab Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 11 May 2017 21:19:55 -0400 Subject: WT-3158 Fix structure layout on Windows. (#3416) We use a pragma on Windows to force a struct to be packed, but were missing the "end" pragma that restores normal layout. The result was that most structs were being packed, leading to poor performance for workloads (particularly when accessing session structures). --- dist/s_style | 10 ++++++---- src/include/btmem.h | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/dist/s_style b/dist/s_style index 388a481ef56..54a88bbeb4c 100755 --- a/dist/s_style +++ b/dist/s_style @@ -101,10 +101,12 @@ else cat $t fi - # Alignment directive before "struct". - egrep 'WT_COMPILER_TYPE_ALIGN.*struct' $f > $t - test -s $t && { - echo "$f: compiler alignment direction must precede \"struct\"" + # If we don't have matching pack-begin and pack-end calls, we don't get + # an error, we just get a Windows performance regression. + egrep WT_PACKED_STRUCT $f > $t + cnt=`wc -l < $t` + test `expr "$cnt" % 2` -ne 0 && { + echo "$f: mismatched WT_PACKED_STRUCT_BEGIN/END lines" cat $t } diff --git a/src/include/btmem.h b/src/include/btmem.h index f1bb08d2699..b1d5df4e9d2 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -933,7 +933,7 @@ WT_PACKED_STRUCT_BEGIN(__wt_update) #define WT_UPDATE_MEMSIZE(upd) \ WT_ALIGN(sizeof(WT_UPDATE) + \ (WT_UPDATE_DELETED_ISSET(upd) ? 0 : (upd)->size), 32) -}; +WT_PACKED_STRUCT_END /* * WT_INSERT -- -- cgit v1.2.1 From 78109ca3fea602885a62fc2872fc553450168149 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 12 May 2017 08:25:05 -0400 Subject: WT-3158 Fix structure layout on Windows. (#3417) Use awk instead of wc to get a count of lines, awk never includes whitespace in the output. --- dist/s_style | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dist/s_style b/dist/s_style index 54a88bbeb4c..69cf1f667fa 100755 --- a/dist/s_style +++ b/dist/s_style @@ -102,9 +102,10 @@ else fi # If we don't have matching pack-begin and pack-end calls, we don't get - # an error, we just get a Windows performance regression. + # an error, we just get a Windows performance regression. Using awk and + # not wc to ensure there's no whitespace in the assignment. egrep WT_PACKED_STRUCT $f > $t - cnt=`wc -l < $t` + cnt=`awk 'BEGIN { line = 0 } { ++line } END { print line }' < $t` test `expr "$cnt" % 2` -ne 0 && { echo "$f: mismatched WT_PACKED_STRUCT_BEGIN/END lines" cat $t -- cgit v1.2.1 From 0a0d1562708f78da356f5452d6ec7dbc203d1c2a Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Mon, 15 May 2017 16:24:39 -0400 Subject: WT-3142 Add a workload generator application (#3408) --- bench/workgen/Makefile.am | 30 + bench/workgen/runner/example_simple.py | 31 + bench/workgen/runner/example_txn.py | 29 + bench/workgen/runner/insert_test.py | 94 ++ bench/workgen/runner/multi_btree_heavy_stress.py | 102 ++ bench/workgen/runner/runner/__init__.py | 92 ++ bench/workgen/runner/runner/core.py | 101 ++ bench/workgen/runner/runner/latency.py | 122 ++ bench/workgen/runner/small_btree.py | 27 + bench/workgen/setup.py | 70 + bench/workgen/workgen.cxx | 1605 ++++++++++++++++++++++ bench/workgen/workgen.h | 410 ++++++ bench/workgen/workgen.swig | 233 ++++ bench/workgen/workgen/__init__.py | 42 + bench/workgen/workgen_func.c | 89 ++ bench/workgen/workgen_func.h | 44 + bench/workgen/workgen_int.h | 205 +++ bench/workgen/workgen_time.h | 201 +++ build_posix/Make.subdirs | 6 +- build_posix/configure.ac.in | 1 + build_posix/makemake | 16 +- dist/s_string.ok | 4 + dist/s_whitespace | 1 + 23 files changed, 3550 insertions(+), 5 deletions(-) create mode 100644 bench/workgen/Makefile.am create mode 100755 bench/workgen/runner/example_simple.py create mode 100644 bench/workgen/runner/example_txn.py create mode 100644 bench/workgen/runner/insert_test.py create mode 100644 bench/workgen/runner/multi_btree_heavy_stress.py create mode 100644 bench/workgen/runner/runner/__init__.py create mode 100644 bench/workgen/runner/runner/core.py create mode 100644 bench/workgen/runner/runner/latency.py create mode 100644 bench/workgen/runner/small_btree.py create mode 100644 bench/workgen/setup.py create mode 100644 bench/workgen/workgen.cxx create mode 100644 bench/workgen/workgen.h create mode 100644 bench/workgen/workgen.swig create mode 100644 bench/workgen/workgen/__init__.py create mode 100644 bench/workgen/workgen_func.c create mode 100644 bench/workgen/workgen_func.h create mode 100644 bench/workgen/workgen_int.h create mode 100644 bench/workgen/workgen_time.h diff --git a/bench/workgen/Makefile.am b/bench/workgen/Makefile.am new file mode 100644 index 00000000000..61512d65319 --- /dev/null +++ b/bench/workgen/Makefile.am @@ -0,0 +1,30 @@ +AM_CPPFLAGS = -I$(top_builddir) +AM_CPPFLAGS += -I$(top_srcdir)/src/include +AM_CPPFLAGS +=-I$(top_srcdir)/test/utility + +PYSRC = $(top_srcdir)/bench/workgen +PYDIRS = -t $(abs_builddir) -I $(abs_top_srcdir):$(abs_top_builddir) -L $(abs_top_builddir)/.libs:$(abs_top_builddir)/bench/workgen/.libs +all-local: _workgen.so libworkgen.la +libworkgen_la_SOURCES = workgen.cxx workgen_func.c +noinst_LTLIBRARIES = libworkgen.la + +# We keep generated Python sources under bench/workgen. +$(PYSRC)/workgen_wrap.cxx: $(PYSRC)/workgen.h $(PYSRC)/workgen.swig + (cd $(PYSRC) && \ + $(SWIG) -c++ -python -threads -O -Wall -I$(abs_top_builddir) -outdir ./workgen workgen.swig) + +_workgen.so: $(top_builddir)/libwiredtiger.la $(PYSRC)/workgen_wrap.cxx libworkgen.la $(PYSRC)/workgen.h $(PYSRC)/workgen_time.h + (cd $(PYSRC) && \ + $(PYTHON) setup.py build_ext -f -b $(abs_builddir) $(PYDIRS)) + +install-exec-local: + (cd $(PYSRC) && \ + $(PYTHON) setup.py build_py -d $(abs_builddir)/build && \ + $(PYTHON) setup.py build_ext -f -b $(abs_builddir)/build $(PYDIRS) && \ + $(PYTHON) setup.py install_lib -b $(abs_builddir)/build --skip-build $(PYTHON_INSTALL_ARG)) + +# We build in different places for an install vs running from the tree: +# clean up both. Don't rely on "setup.py clean" -- everything that should +# be removed is created under the build directory. +clean-local: + rm -rf build _workgen.so workgen_wrap.o WT_TEST diff --git a/bench/workgen/runner/example_simple.py b/bench/workgen/runner/example_simple.py new file mode 100755 index 00000000000..de944cbe29e --- /dev/null +++ b/bench/workgen/runner/example_simple.py @@ -0,0 +1,31 @@ +#!/usr/bin/python +from runner import * +from wiredtiger import * +from workgen import * + +def show(tname): + print('') + print('<><><><> ' + tname + ' <><><><>') + c = s.open_cursor(tname, None) + for k,v in c: + print('key: ' + k) + print('value: ' + v) + print('<><><><><><><><><><><><>') + c.close() + +context = Context() +conn = wiredtiger_open("WT_TEST", "create,cache_size=1G") +s = conn.open_session() +tname = 'table:simple' +s.create(tname, 'key_format=S,value_format=S') + +ops = Operation(Operation.OP_INSERT, Table(tname), Key(Key.KEYGEN_APPEND, 10), Value(40)) +thread = Thread(ops) +workload = Workload(context, thread) +workload.run(conn) +show(tname) + +thread = Thread(ops * 5) +workload = Workload(context, thread) +workload.run(conn) +show(tname) diff --git a/bench/workgen/runner/example_txn.py b/bench/workgen/runner/example_txn.py new file mode 100644 index 00000000000..ef1d7a93941 --- /dev/null +++ b/bench/workgen/runner/example_txn.py @@ -0,0 +1,29 @@ +#!/usr/bin/python +from runner import * +from wiredtiger import * +from workgen import * + +conn = wiredtiger_open("WT_TEST", "create,cache_size=500MB") +s = conn.open_session() +tname = "table:test" +s.create(tname, 'key_format=S,value_format=S') +table = Table(tname) +table.options.key_size = 20 +table.options.value_size = 100 + +context = Context() +op = Operation(Operation.OP_INSERT, table) +thread = Thread(op * 500000) +pop_workload = Workload(context, thread) +print('populate:') +pop_workload.run(conn) + +opread = Operation(Operation.OP_SEARCH, table) +opwrite = Operation(Operation.OP_INSERT, table) +treader = Thread(opread) +twriter = Thread(txn(opwrite * 2)) +workload = Workload(context, treader * 8 + twriter * 2) +workload.options.run_time = 10 +workload.options.report_interval = 5 +print('transactional write workload:') +workload.run(conn) diff --git a/bench/workgen/runner/insert_test.py b/bench/workgen/runner/insert_test.py new file mode 100644 index 00000000000..30f2818e91e --- /dev/null +++ b/bench/workgen/runner/insert_test.py @@ -0,0 +1,94 @@ +#!/usr/bin/python +from runner import * +from wiredtiger import * +from workgen import * + +def tablename(id): + return "table:test%06d" % id + +def show(tname): + print('') + print('<><><><> ' + tname + ' <><><><>') + c = s.open_cursor(tname, None) + for k,v in c: + print('key: ' + k) + print('value: ' + v) + print('<><><><><><><><><><><><>') + c.close() + +def expectException(expr): + gotit = False + try: + expr() + except BaseException as e: + print('got expected exception: ' + str(e)) + gotit = True + if not gotit: + raise Exception("missing expected exception") + +context = Context() +conn = wiredtiger_open("WT_TEST", "create,cache_size=1G") +s = conn.open_session() +tname0 = tablename(0) +tname1 = tablename(1) +s.create(tname0, 'key_format=S,value_format=S') +s.create(tname1, 'key_format=S,value_format=S') + +ops = Operation(Operation.OP_INSERT, Table(tname0), Key(Key.KEYGEN_APPEND, 10), Value(100)) +workload = Workload(context, Thread(ops)) + +print('RUN1') +workload.run(conn) +show(tname0) + +# The context has memory of how many keys are in all the tables. +# truncate goes behind context's back, but it doesn't matter for +# an insert-only test. +s.truncate(tname0, None, None) + +# Show how to 'multiply' operations +op = Operation(Operation.OP_INSERT, Table(tname0), Key(Key.KEYGEN_APPEND, 10), Value(100)) +op2 = Operation(Operation.OP_INSERT, Table(tname1), Key(Key.KEYGEN_APPEND, 20), Value(30)) +o = op2 * 10 +print 'op is: ' + str(op) +print 'multiplying op is: ' + str(o) +thread0 = Thread(o + op + op) +workload = Workload(context, thread0) +print('RUN2') +workload.run(conn) +show(tname0) +show(tname1) + +s.truncate(tname0, None, None) +s.truncate(tname1, None, None) + +# operations can be multiplied, added in any combination. +op += Operation(Operation.OP_INSERT, Table(tname0), Key(Key.KEYGEN_APPEND, 10), Value(10)) +op *= 2 +op += Operation(Operation.OP_INSERT, Table(tname0), Key(Key.KEYGEN_APPEND, 10), Value(10)) +thread0 = Thread(op * 10 + op2 * 20) +workload = Workload(context, thread0) +print('RUN3') +workload.run(conn) +show(tname0) +show(tname1) + +print('workload is ' + str(workload)) +print('thread0 is ' + str(thread0)) + +def assignit(k, n): + k._size = n + +expectException(lambda: Operation( + Operation.OP_INSERT, Table('foo'), Key(Key.KEYGEN_APPEND, 10))) +# we don't catch this exception here, but in Workload.run() +k = Key(Key.KEYGEN_APPEND, 1) +assignit(k, 30) +assignit(k, 1) # we don't catch this exception here, but in Workload.run() +op = Operation(Operation.OP_INSERT, Table(tname0), k, Value(10)) +workload = Workload(context, Thread(op)) +print('RUN4') +expectException(lambda: workload.run(conn)) + +print('HELP:') +print(workload.options.help()) diff --git a/bench/workgen/runner/multi_btree_heavy_stress.py b/bench/workgen/runner/multi_btree_heavy_stress.py new file mode 100644 index 00000000000..0993f60248d --- /dev/null +++ b/bench/workgen/runner/multi_btree_heavy_stress.py @@ -0,0 +1,102 @@ +#!/usr/bin/python +# Drive a constant high workload through, even if WiredTiger isn't keeping +# up by dividing the workload across a lot of threads. This needs to be +# tuned to the particular machine so the workload is close to capacity in the +# steady state, but not overwhelming. +# +################ +# Note: as a proof of concept for workgen, this matches closely +# bench/wtperf/runner/multi-btree-read-heavy-stress.wtperf . +# Run time, #ops, #threads are ratcheted way down for testing. +# +from runner import * +from wiredtiger import * +from workgen import * + +def op_append(ops, op): + if ops == None: + ops = op + else: + ops += op + return ops + +def make_op(optype, table, key, value = None): + if value == None: + return Operation(optype, table, key) + else: + return Operation(optype, table, key, value) + +logkey = Key(Key.KEYGEN_APPEND, 8) ## should be 8 bytes format 'Q' +def operations(optype, tables, key, value = None, ops_per_txn = 0, logtable = None): + txn_list = [] + ops = None + nops = 0 + for table in tables: + ops = op_append(ops, make_op(optype, table, key, value)) + if logtable != None: + ops = op_append(ops, make_op(optype, logtable, logkey, value)) + nops += 1 + if ops_per_txn > 0 and nops % ops_per_txn == 0: + txn_list.append(txn(ops)) + ops = None + if ops_per_txn > 0: + if ops != None: + txn_list.append(txn(ops)) + ops = None + for t in txn_list: + ops = op_append(ops, t) + return ops + +context = Context() +## cache_size=20GB +conn_config="create,cache_size=1GB,session_max=1000,eviction=(threads_min=4,threads_max=8),log=(enabled=false),transaction_sync=(enabled=false),checkpoint_sync=true,checkpoint=(wait=60),statistics=(fast),statistics_log=(json,wait=1)" +table_config="allocation_size=4k,memory_page_max=10MB,prefix_compression=false,split_pct=90,leaf_page_max=32k,internal_page_max=16k,type=file,block_compressor=snappy" +conn_config += extensions_config(['compressors/snappy']) +conn = wiredtiger_open("WT_TEST", conn_config) +s = conn.open_session() + +tables = [] +for i in range(0, 8): + tname = "table:test" + str(i) + s.create(tname, 'key_format=S,value_format=S,' + table_config) + tables.append(Table(tname)) +tname = "table:log" +# TODO: use table_config for the log file? +s.create(tname, 'key_format=S,value_format=S,' + table_config) +logtable = Table(tname) + +##icount=200000000 / 8 +icount=20000 +ins_ops = operations(Operation.OP_INSERT, tables, Key(Key.KEYGEN_APPEND, 20), Value(500)) +thread = Thread(ins_ops * icount) +pop_workload = Workload(context, thread) +print('populate:') +pop_workload.run(conn) + +ins_ops = operations(Operation.OP_INSERT, tables, Key(Key.KEYGEN_APPEND, 20), Value(500), 0, logtable) +upd_ops = operations(Operation.OP_UPDATE, tables, Key(Key.KEYGEN_UNIFORM, 20), Value(500), 0, logtable) +read_ops = operations(Operation.OP_SEARCH, tables, Key(Key.KEYGEN_UNIFORM, 20), None, 3) + +ins_thread = Thread(ins_ops) +upd_thread = Thread(upd_ops) +read_thread = Thread(read_ops) +ins_thread.options.throttle = 250 +ins_thread.options.name = "Insert" +upd_thread.options.throttle = 250 +upd_thread.options.name = "Update" +read_thread.options.throttle = 1000 +read_thread.options.name = "Read" +##threads = [ins_thread] * 10 + [upd_thread] * 10 + [read_thread] * 80 +threads = ins_thread * 1 + upd_thread * 1 + read_thread * 2 +workload = Workload(context, threads) +##workload.options.run_time = 3600 +workload.options.run_time = 30 +workload.options.report_interval = 1 +workload.options.sample_interval = 5 +workload.options.sample_rate = 1 +print('heavy stress workload:') +workload.run(conn) + +latency_filename = conn.get_home() + '/latency.out' +print('for latency output, see: ' + latency_filename) +latency.workload_latency(workload, latency_filename) diff --git a/bench/workgen/runner/runner/__init__.py b/bench/workgen/runner/runner/__init__.py new file mode 100644 index 00000000000..67b547bc51b --- /dev/null +++ b/bench/workgen/runner/runner/__init__.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# runner/__init__.py +# Used as a first import by runners, does any common initialization. +from __future__ import print_function + +import os, shutil, sys +thisdir = os.path.dirname(os.path.abspath(__file__)) +workgen_src = os.path.dirname(os.path.dirname(thisdir)) +wt_dir = os.path.dirname(os.path.dirname(workgen_src)) +wt_builddir = os.path.join(wt_dir, 'build_posix') + +def _prepend_env_path(pathvar, s): + last = '' + try: + last = ':' + os.environ[pathvar] + except: + pass + os.environ[pathvar] = s + last + +# Initialize the python path so needed modules can be imported. +# If the path already works, don't change it. +try: + import wiredtiger +except: + # We'll try hard to make the importing work, we'd like to runners + # to be executable directly without having to set environment variables. + sys.path.insert(0, os.path.join(wt_dir, 'lang', 'python')) + sys.path.insert(0, os.path.join(wt_builddir, 'lang', 'python')) + try: + import wiredtiger + except: + # If the .libs directory is not in our library search path, + # we need to set it and retry. However, the dynamic link + # library has already cached its value, our only option is + # to restart the Python interpreter. + if '_workgen_init' not in os.environ: + os.environ['_workgen_init'] = 'true' + dotlibs = os.path.join(wt_builddir, '.libs') + _prepend_env_path('LD_LIBRARY_PATH', dotlibs) + _prepend_env_path('DYLD_LIBRARY_PATH', dotlibs) + py_args = sys.argv + py_args.insert(0, sys.executable) + try: + os.execv(sys.executable, py_args) + except Exception, exception: + print('re-exec failed: ' + str(exception), file=sys.stderr) + print(' exec(' + sys.executable + ', ' + str(py_args) + ')') + print('Try adding "' + dotlibs + '" to the', file=sys.stderr) + print('LD_LIBRARY_PATH environment variable before running ' + \ + 'this program again.', file=sys.stderr) + sys.exit(1) + +try: + import workgen +except: + sys.path.insert(0, os.path.join(workgen_src, 'workgen')) + sys.path.insert(0, os.path.join(wt_builddir, 'bench', 'workgen')) + import workgen + +# Clear out the WT_TEST directory. +shutil.rmtree('WT_TEST', True) +os.mkdir('WT_TEST') + +from .core import txn, extensions_config +from .latency import workload_latency diff --git a/bench/workgen/runner/runner/core.py b/bench/workgen/runner/runner/core.py new file mode 100644 index 00000000000..a0f0d4d77cd --- /dev/null +++ b/bench/workgen/runner/runner/core.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# runner/core.py +# Core functions available to all runners +import glob, os +import workgen + +# txn -- +# Put the operation (and any suboperations) within a transaction. +def txn(op, config=None): + t = workgen.Transaction(config) + op._transaction = t + return op + +# Check for a local build that contains the wt utility. First check in +# current working directory, then in build_posix and finally in the disttop +# directory. This isn't ideal - if a user has multiple builds in a tree we +# could pick the wrong one. +def _wiredtiger_builddir(): + if os.path.isfile(os.path.join(os.getcwd(), 'wt')): + return os.getcwd() + + # The directory of this file should be within the distribution tree. + thisdir = os.path.dirname(os.path.abspath(__file__)) + wt_disttop = os.path.join(\ + thisdir, os.pardir, os.pardir, os.pardir, os.pardir) + if os.path.isfile(os.path.join(wt_disttop, 'wt')): + return wt_disttop + if os.path.isfile(os.path.join(wt_disttop, 'build_posix', 'wt')): + return os.path.join(wt_disttop, 'build_posix') + if os.path.isfile(os.path.join(wt_disttop, 'wt.exe')): + return wt_disttop + raise Exception('Unable to find useable WiredTiger build') + +# Return the wiredtiger_open extension argument for any needed shared library. +# Called with a list of extensions, e.g. +# [ 'compressors/snappy', 'encryptors/rotn=config_string' ] +def extensions_config(exts): + result = '' + extfiles = {} + errpfx = 'extensions_config' + builddir = _wiredtiger_builddir() + for ext in exts: + extconf = '' + if '=' in ext: + splits = ext.split('=', 1) + ext = splits[0] + extconf = '=' + splits[1] + splits = ext.split('/') + if len(splits) != 2: + raise Exception(errpfx + ": " + ext + + ": extension is not named /") + libname = splits[1] + dirname = splits[0] + pat = os.path.join(builddir, 'ext', + dirname, libname, '.libs', 'libwiredtiger_*.so') + filenames = glob.glob(pat) + if len(filenames) == 0: + raise Exception(errpfx + + ": " + ext + + ": no extensions library found matching: " + pat) + elif len(filenames) > 1: + raise Exception(errpfx + ": " + ext + + ": multiple extensions libraries found matching: " + pat) + complete = '"' + filenames[0] + '"' + extconf + if ext in extfiles: + if extfiles[ext] != complete: + raise Exception(errpfx + + ": non-matching extension arguments in " + + str(exts)) + else: + extfiles[ext] = complete + if len(extfiles) != 0: + result = ',extensions=[' + ','.join(extfiles.values()) + ']' + return result diff --git a/bench/workgen/runner/runner/latency.py b/bench/workgen/runner/runner/latency.py new file mode 100644 index 00000000000..46d9be9bad8 --- /dev/null +++ b/bench/workgen/runner/runner/latency.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# runner/latency.py +# Utility functions for showing latency statistics +from __future__ import print_function +import sys + +def _show_buckets(fh, title, mult, buckets, n): + shown = False + s = title + ': ' + for count in range(0, n): + val = buckets[count] + if val != 0: + if shown: + s += ',' + s += str(count*mult) + '=' + str(val) + shown = True + print(s, file=fh) + +def _latency_preprocess(arr, merge): + mx = 0 + cur = 0 + # SWIG arrays have a clunky interface + for i in range(0, arr.__len__()): + if i % merge == 0: + cur = 0 + cur += arr[i] + if cur > mx: + mx = cur + arr.height = mx + +def _latency_plot(box, ch, left, width, arr, merge, scale): + pos = 0 + for x in range(0, width): + t = 0 + for i in range(0, merge): + t += arr[pos] + pos += 1 + nch = scale * t + y = 0 + while nch > 0.0: + box[y][left + x] = ch + nch -= 1.0 + y += 1 + +def _latency_optype(fh, name, ch, t): + if t.ops == 0: + return + if t.latency_ops == 0: + print('**** ' + name + ' operations: ' + str(t.ops), file=fh) + return + print('**** ' + name + ' operations: ' + str(t.ops) + \ + ', latency operations: ' + str(t.latency_ops), file=fh) + print(' avg: ' + str(t.latency/t.latency_ops) + \ + ', min: ' + str(t.min_latency) + ', max: ' + str(t.max_latency), + file=fh) + us = t.us() + ms = t.ms() + sec = t.sec() + _latency_preprocess(us, 40) + _latency_preprocess(ms, 40) + _latency_preprocess(sec, 4) + max_height = max(us.height, ms.height, sec.height) + if max_height == 0: + return + height = 20 # 20 chars high + # a list of a list of characters + box = [list(' ' * 80) for x in range(height)] + scale = (1.0 / (max_height + 1)) * height + _latency_plot(box, ch, 0, 25, us, 40, scale) + _latency_plot(box, ch, 27, 25, ms, 40, scale) + _latency_plot(box, ch, 54, 25, sec, 4, scale) + box.reverse() + for line in box: + print(''.join(line), file=fh) + dash25 = '-' * 25 + print(' '.join([dash25] * 3), file=fh) + print(' 0 - 999 us (40/bucket) 1 - 999 ms (40/bucket) ' + \ + '1 - 99 sec (4/bucket)', file=fh) + print('', file=fh) + _show_buckets(fh, name + ' us', 1, us, 1000) + _show_buckets(fh, name + ' ms', 1000, ms, 1000) + _show_buckets(fh, name + ' sec', 1000000, sec, 100) + print('', file=fh) + +def workload_latency(workload, outfilename = None): + if outfilename: + fh = open(outfilename, 'w') + else: + fh = sys.stdout + _latency_optype(fh, 'insert', 'I', workload.stats.insert) + _latency_optype(fh, 'read', 'R', workload.stats.read) + _latency_optype(fh, 'remove', 'X', workload.stats.remove) + _latency_optype(fh, 'update', 'U', workload.stats.update) + _latency_optype(fh, 'truncate', 'T', workload.stats.truncate) + _latency_optype(fh, 'not found', 'N', workload.stats.not_found) diff --git a/bench/workgen/runner/small_btree.py b/bench/workgen/runner/small_btree.py new file mode 100644 index 00000000000..d70f0d9e693 --- /dev/null +++ b/bench/workgen/runner/small_btree.py @@ -0,0 +1,27 @@ +#!/usr/bin/python +from runner import * +from wiredtiger import * +from workgen import * + +context = Context() +conn = wiredtiger_open("WT_TEST", "create,cache_size=500MB") +s = conn.open_session() +tname = "file:test.wt" +s.create(tname, 'key_format=S,value_format=S') +table = Table(tname) +table.options.key_size = 20 +table.options.value_size = 100 + +op = Operation(Operation.OP_INSERT, table) +thread = Thread(op * 500000) +pop_workload = Workload(context, thread) +print('populate:') +pop_workload.run(conn) + +op = Operation(Operation.OP_SEARCH, table) +t = Thread(op) +workload = Workload(context, t * 8) +workload.options.run_time = 120 +workload.options.report_interval = 5 +print('read workload:') +workload.run(conn) diff --git a/bench/workgen/setup.py b/bench/workgen/setup.py new file mode 100644 index 00000000000..9fb5fa7b73a --- /dev/null +++ b/bench/workgen/setup.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +from __future__ import print_function +import re, os, sys +from distutils.core import setup, Extension + +# OS X hack: turn off the Universal binary support that is built into the +# Python build machinery, just build for the default CPU architecture. +if not 'ARCHFLAGS' in os.environ: + os.environ['ARCHFLAGS'] = '' + +# Suppress warnings building SWIG generated code +extra_cflags = [ '-w', '-Wno-sign-conversion', '-I../../src/include', \ + '-I../../test/utility'] + +dir = os.path.dirname(__file__) +abs_dir = os.path.dirname(os.path.abspath(__file__)) + +if abs_dir.endswith(os.sep + os.path.join('bench', 'workgen')): + wt_dir = os.path.dirname(os.path.dirname(abs_dir)) +else: + print(os.path.basename(__file__) + ": running from unknown dir", \ + file=sys.stderr) + sys.exit(1) + +build_dir = os.path.join(wt_dir, 'build_posix') + +# Read the version information from the RELEASE_INFO file +for l in open(os.path.join(dir, '..', '..', 'RELEASE_INFO')): + if re.match(r'WIREDTIGER_VERSION_(?:MAJOR|MINOR|PATCH)=', l): + exec(l) + +wt_ver = '%d.%d' % (WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR) + +setup(name='workgen', version=wt_ver, + ext_modules=[Extension('_workgen', + [os.path.join(dir, 'workgen_wrap.cxx')], + libraries=['wiredtiger', 'pthread', 'workgen'], + extra_compile_args=extra_cflags, + )], + package_dir={'' : dir}, + packages=['workgen'], +) diff --git a/bench/workgen/workgen.cxx b/bench/workgen/workgen.cxx new file mode 100644 index 00000000000..c56acfd2989 --- /dev/null +++ b/bench/workgen/workgen.cxx @@ -0,0 +1,1605 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define __STDC_LIMIT_MACROS // needed to get UINT64_MAX in C++ +#include +#include +#include +#include +#include "wiredtiger.h" +#include "workgen.h" +#include "workgen_int.h" +#include "workgen_time.h" +extern "C" { +// Include some specific WT files, as some files included by wt_internal.h +// have some C-ism's that don't work in C++. +#include +#include +#include +#include +#include +#include +#include +#include "error.h" +#include "misc.h" +} + +#define LATENCY_US_BUCKETS 1000 +#define LATENCY_MS_BUCKETS 1000 +#define LATENCY_SEC_BUCKETS 100 + +#define THROTTLE_PER_SEC 20 // times per sec we will throttle + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) < (b) ? (b) : (a)) +#define TIMESPEC_DOUBLE(ts) ((double)(ts).tv_sec + ts.tv_nsec * 0.000000001) +#define PCT(n, total) ((total) == 0 ? 0 : ((n) * 100) / (total)) +#define OPS_PER_SEC(ops, ts) (int) ((ts) == 0 ? 0.0 : \ + (ops) / TIMESPEC_DOUBLE(ts)) + +// Get the value of a STL container, even if it is not present +#define CONTAINER_VALUE(container, idx, dfault) \ + (((container).count(idx) > 0) ? (container)[idx] : (dfault)) + +#define CROSS_USAGE(a, b) \ + (((a & USAGE_READ) != 0 && (b & USAGE_WRITE) != 0) || \ + ((a & USAGE_WRITE) != 0 && (b & USAGE_READ) != 0)) + +#define ASSERT(cond) \ + do { \ + if (!(cond)) { \ + fprintf(stderr, "%s:%d: ASSERT failed: %s\n", \ + __FILE__, __LINE__, #cond); \ + abort(); \ + } \ + } while(0) + +#define THROW_ERRNO(e, args) \ + do { \ + std::stringstream __sstm; \ + __sstm << args; \ + WorkgenException __wge(e, __sstm.str().c_str()); \ + throw(__wge); \ + } while(0) + +#define THROW(args) THROW_ERRNO(0, args) + +#define VERBOSE(runner, args) \ + do { \ + if ((runner)._context->_verbose) \ + std::cout << args << std::endl; \ + } while(0) + +#define OP_HAS_VALUE(op) \ + ((op)->_optype == Operation::OP_INSERT || \ + (op)->_optype == Operation::OP_UPDATE) + +namespace workgen { + +// The number of contexts. Normally there is one context created, but it will +// be possible to use several eventually. More than one is not yet +// implemented, but we must at least guard against the caller creating more +// than one. +static uint32_t context_count = 0; + +static void *thread_runner_main(void *arg) { + ThreadRunner *runner = (ThreadRunner *)arg; + try { + runner->_errno = runner->run(); + } catch (WorkgenException &wge) { + runner->_exception = wge; + } + return (NULL); +} + +static void *monitor_main(void *arg) { + Monitor *monitor = (Monitor *)arg; + try { + monitor->_errno = monitor->run(); + } catch (WorkgenException &wge) { + monitor->_exception = wge; + } + return (NULL); +} + +// Exponentiate (like the pow function), except that it returns an exact +// integral 64 bit value, and if it overflows, returns the maximum possible +// value for the return type. +static uint64_t power64(int base, int exp) { + uint64_t last, result; + + result = 1; + for (int i = 0; i < exp; i++) { + last = result; + result *= base; + if (result < last) + return UINT64_MAX; + } + return result; +} + +OptionsList::OptionsList() : _option_map() {} +OptionsList::OptionsList(const OptionsList &other) : + _option_map(other._option_map) {} + +void OptionsList::add_option(const char *name, const std::string typestr, + const char *desc) { + TypeDescPair pair(typestr, desc); + _option_map[name] = pair; +} + +void OptionsList::add_int(const char *name, int default_value, + const char *desc) { + std::stringstream sstm; + sstm << "int, default=" << default_value; + add_option(name, sstm.str(), desc); +} + +void OptionsList::add_bool(const char *name, bool default_value, + const char *desc) { + std::stringstream sstm; + sstm << "boolean, default=" << (default_value ? "true" : "false"); + add_option(name, sstm.str(), desc); +} + +void OptionsList::add_double(const char *name, double default_value, + const char *desc) { + std::stringstream sstm; + sstm << "double, default=" << default_value; + add_option(name, sstm.str(), desc); +} + +void OptionsList::add_string(const char *name, + const std::string &default_value, const char *desc) { + std::stringstream sstm; + sstm << "string, default=\"" << default_value << "\""; + add_option(name, sstm.str(), desc); +} + +static void +pretty_print(const char *p, const char *indent, std::stringstream &sstm) +{ + const char *t; + + for (;; p = t + 1) { + if (strlen(p) <= 70) + break; + for (t = p + 70; t > p && *t != ' '; --t) + ; + if (t == p) /* No spaces? */ + break; + if (indent != NULL) + sstm << indent; + std::string line(p, (size_t)(t - p)); + sstm << line << std::endl; + } + if (*p != '\0') { + if (indent != NULL) + sstm << indent; + sstm << p << std::endl; + } +} + +std::string OptionsList::help() const { + std::stringstream sstm; + for (std::map::const_iterator i = + _option_map.begin(); i != _option_map.end(); i++) { + sstm << i->first << " (" << i->second.first << ")" << std::endl; + pretty_print(i->second.second.c_str(), "\t", sstm); + } + return sstm.str(); +} + +std::string OptionsList::help_description(const char *option_name) const { + const std::string key(option_name); + if (_option_map.count(key) == 0) + return (std::string("")); + else + return (_option_map.find(key)->second.second); +} + +std::string OptionsList::help_type(const char *option_name) const { + const std::string key(option_name); + if (_option_map.count(key) == 0) + return std::string(""); + else + return (_option_map.find(key)->second.first); +} + +Context::Context() : _verbose(false), _internal(new ContextInternal()) {} +Context::~Context() { delete _internal; } +Context& Context::operator=(const Context &other) { + _verbose = other._verbose; + *_internal = *other._internal; + return (*this); +} + +ContextInternal::ContextInternal() : _tint(), _table_names(), + _recno(NULL), _recno_alloced(0), _tint_last(0), _context_count(0) { + uint32_t count; + if ((count = workgen_atomic_add32(&context_count, 1)) != 1) + THROW("multiple Contexts not supported"); + _context_count = count; +} + +ContextInternal::~ContextInternal() { + if (_recno != NULL) + delete _recno; +} + +int ContextInternal::create_all() { + if (_recno_alloced != _tint_last) { + // The array references are 1-based, we'll waste one entry. + uint64_t *new_recno = new uint64_t[_tint_last + 1]; + memcpy(new_recno, _recno, sizeof(uint64_t) * _recno_alloced); + memset(&new_recno[_recno_alloced], 0, + sizeof(uint64_t) * (_tint_last - _recno_alloced + 1)); + delete _recno; + _recno = new_recno; + _recno_alloced = _tint_last; + } + return (0); +} + +Monitor::Monitor(WorkloadRunner &wrunner) : + _errno(0), _exception(), _wrunner(wrunner), _stop(false), _handle() {} +Monitor::~Monitor() {} + +int Monitor::run() { + struct timespec t; + struct tm *tm, _tm; + char time_buf[64]; + Stats prev_totals; + WorkloadOptions *options = &_wrunner._workload->options; + uint64_t latency_max = (uint64_t)options->max_latency; + + (*_out) << "#time," + << "totalsec," + << "read ops per second," + << "insert ops per second," + << "update ops per second," + << "checkpoints," + << "read average latency(uS)," + << "read minimum latency(uS)," + << "read maximum latency(uS)," + << "insert average latency(uS)," + << "insert min latency(uS)," + << "insert maximum latency(uS)," + << "update average latency(uS)," + << "update min latency(uS)," + << "update maximum latency(uS)" + << std::endl; + + Stats prev_interval; + while (!_stop) { + for (int i = 0; i < options->sample_interval && !_stop; i++) + sleep(1); + if (_stop) + break; + + workgen_epoch(&t); + tm = localtime_r(&t.tv_sec, &_tm); + (void)strftime(time_buf, sizeof(time_buf), "%b %d %H:%M:%S", tm); + + Stats new_totals(true); + for (std::vector::iterator tr = + _wrunner._trunners.begin(); tr != _wrunner._trunners.end(); tr++) + new_totals.add(tr->_stats, true); + Stats interval(new_totals); + interval.subtract(prev_totals); + interval.smooth(prev_interval); + + int interval_secs = options->sample_interval; + uint64_t cur_reads = interval.read.ops / interval_secs; + uint64_t cur_inserts = interval.insert.ops / interval_secs; + uint64_t cur_updates = interval.update.ops / interval_secs; + + uint64_t totalsec = ts_sec(t - _wrunner._start); + (*_out) << time_buf + << "," << totalsec + << "," << cur_reads + << "," << cur_inserts + << "," << cur_updates + << "," << 'N' // checkpoint in progress + << "," << interval.read.average_latency() + << "," << interval.read.min_latency + << "," << interval.read.max_latency + << "," << interval.insert.average_latency() + << "," << interval.insert.min_latency + << "," << interval.insert.max_latency + << "," << interval.update.average_latency() + << "," << interval.update.min_latency + << "," << interval.update.max_latency + << std::endl; + + uint64_t read_max = interval.read.max_latency; + uint64_t insert_max = interval.read.max_latency; + uint64_t update_max = interval.read.max_latency; + + if (latency_max != 0 && + (read_max > latency_max || insert_max > latency_max || + update_max > latency_max)) { + std::cerr << "WARNING: max latency exceeded:" + << " threshold " << latency_max + << " read max " << read_max + << " insert max " << insert_max + << " update max " << update_max << std::endl; + } + + prev_interval.assign(interval); + prev_totals.assign(new_totals); + } + return (0); +} + +ThreadRunner::ThreadRunner() : + _errno(0), _exception(), _thread(NULL), _context(NULL), _icontext(NULL), + _workload(NULL), _wrunner(NULL), _rand_state(NULL), + _throttle(NULL), _throttle_ops(0), _throttle_limit(0), + _in_transaction(false), _number(0), _stats(false), _table_usage(), + _cursors(NULL), _stop(false), _session(NULL), _keybuf(NULL), + _valuebuf(NULL), _repeat(false) { +} + +ThreadRunner::~ThreadRunner() { + free_all(); +} + +int ThreadRunner::create_all(WT_CONNECTION *conn) { + size_t keysize, valuesize; + + WT_RET(close_all()); + ASSERT(_session == NULL); + WT_RET(conn->open_session(conn, NULL, NULL, &_session)); + _table_usage.clear(); + _stats.track_latency(_workload->options.sample_interval > 0); + WT_RET(workgen_random_alloc(_session, &_rand_state)); + _throttle_ops = 0; + _throttle_limit = 0; + _in_transaction = 0; + keysize = 1; + valuesize = 1; + op_create_all(&_thread->_op, keysize, valuesize); + _keybuf = new char[keysize]; + _valuebuf = new char[valuesize]; + _keybuf[keysize - 1] = '\0'; + _valuebuf[valuesize - 1] = '\0'; + return (0); +} + +int ThreadRunner::open_all() { + typedef WT_CURSOR *WT_CURSOR_PTR; + if (_cursors != NULL) + delete _cursors; + _cursors = new WT_CURSOR_PTR[_icontext->_tint_last + 1]; + memset(_cursors, 0, sizeof (WT_CURSOR *) * (_icontext->_tint_last + 1)); + for (std::map::iterator i = _table_usage.begin(); + i != _table_usage.end(); i++) { + uint32_t tindex = i->first; + const char *uri = _icontext->_table_names[tindex].c_str(); + WT_RET(_session->open_cursor(_session, uri, NULL, NULL, + &_cursors[tindex])); + } + return (0); +} + +int ThreadRunner::close_all() { + if (_throttle != NULL) { + delete _throttle; + _throttle = NULL; + } + if (_session != NULL) { + WT_RET(_session->close(_session, NULL)); + _session = NULL; + } + free_all(); + return (0); +} + +void ThreadRunner::free_all() { + if (_rand_state != NULL) { + workgen_random_free(_rand_state); + _rand_state = NULL; + } + if (_cursors != NULL) { + delete _cursors; + _cursors = NULL; + } + if (_keybuf != NULL) { + delete _keybuf; + _keybuf = NULL; + } + if (_valuebuf != NULL) { + delete _valuebuf; + _valuebuf = NULL; + } +} + +int ThreadRunner::cross_check(std::vector &runners) { + std::map usage; + + // Determine which tables have cross usage + for (std::vector::iterator r = runners.begin(); + r != runners.end(); r++) { + for (std::map::iterator i = r->_table_usage.begin(); + i != r->_table_usage.end(); i++) { + uint32_t tindex = i->first; + uint32_t thisusage = i->second; + uint32_t curusage = CONTAINER_VALUE(usage, tindex, 0); + if (CROSS_USAGE(curusage, thisusage)) + curusage |= USAGE_MIXED; + usage[tindex] = curusage; + } + } + for (std::map::iterator i = usage.begin(); + i != usage.end(); i++) { + if ((i->second & USAGE_MIXED) != 0) { + for (std::vector::iterator r = runners.begin(); + r != runners.end(); r++) { + r->_table_usage[i->first] |= USAGE_MIXED; + } + } + } + return (0); +} + +int ThreadRunner::run() { + WT_DECL_RET; + ThreadOptions *options = &_thread->options; + std::string name = options->name; + + VERBOSE(*this, "thread " << name << " running"); + if (options->throttle != 0) { + _throttle = new Throttle(*this, options->throttle, + options->throttle_burst); + } + for (int cnt = 0; !_stop && (_repeat || cnt < 1) && ret == 0; cnt++) + WT_ERR(op_run(&_thread->_op)); + +err: +#ifdef _DEBUG + { + std::string messages = this->get_debug(); + if (!messages.empty()) + std::cerr << "DEBUG (thread " << name << "): " + << messages << std::endl; + } +#endif + if (ret != 0) + std::cerr << "thread " << name << " failed err=" << ret << std::endl; + VERBOSE(*this, "thread " << name << "finished"); + return (ret); +} + +void ThreadRunner::get_static_counts(Stats &stats) { + _thread->_op.get_static_counts(stats, 1); +} + +void ThreadRunner::op_create_all(Operation *op, size_t &keysize, + size_t &valuesize) { + tint_t tint; + + op->size_check(); + if (op->_optype != Operation::OP_NONE) { + op->kv_compute_max(true); + if (OP_HAS_VALUE(op)) + op->kv_compute_max(false); + op->kv_size_buffer(true, keysize); + op->kv_size_buffer(false, valuesize); + + // Note: to support multiple contexts we'd need a generation + // count whenever we execute. + if (op->_table._internal->_context_count != 0 && + op->_table._internal->_context_count != _icontext->_context_count) + THROW("multiple Contexts not supported"); + if ((tint = op->_table._internal->_tint) == 0) { + std::string uri = op->_table._uri; + + // We are single threaded in this function, so do not have + // to worry about locking. + if (_icontext->_tint.count(uri) == 0) { + // TODO: don't use atomic add, it's overkill. + tint = workgen_atomic_add32(&_icontext->_tint_last, 1); + _icontext->_tint[uri] = tint; + _icontext->_table_names[tint] = uri; + } else + tint = _icontext->_tint[uri]; + op->_table._internal->_tint = tint; + } + uint32_t usage_flags = CONTAINER_VALUE(_table_usage, + op->_table._internal->_tint, 0); + if (op->_optype == Operation::OP_SEARCH) + usage_flags |= ThreadRunner::USAGE_READ; + else + usage_flags |= ThreadRunner::USAGE_WRITE; + _table_usage[op->_table._internal->_tint] = usage_flags; + } + if (op->_group != NULL) + for (std::vector::iterator i = op->_group->begin(); + i != op->_group->end(); i++) + op_create_all(&*i, keysize, valuesize); +} + +uint64_t ThreadRunner::op_get_key_recno(Operation *op, tint_t tint) { + uint64_t recno_count; + uint32_t rand; + + recno_count = _icontext->_recno[tint]; + if (recno_count == 0) + // The file has no entries, returning 0 forces a WT_NOTFOUND return. + return (0); + rand = workgen_random(_rand_state); + return (rand % recno_count + 1); // recnos are one-based. +} + +int ThreadRunner::op_run(Operation *op) { + Track *track; + tint_t tint = op->_table._internal->_tint; + WT_CURSOR *cursor = _cursors[tint]; + WT_DECL_RET; + uint64_t recno; + bool measure_latency; + + recno = 0; + track = NULL; + if (_throttle != NULL) { + if (_throttle_ops >= _throttle_limit && !_in_transaction) { + WT_ERR(_throttle->throttle(_throttle_ops, + &_throttle_limit)); + _throttle_ops = 0; + } + if (op->_optype != Operation::OP_NONE) + ++_throttle_ops; + } + + // A potential race: thread1 is inserting, and increments + // Context->_recno[] for fileX.wt. thread2 is doing one of + // remove/search/update and grabs the new value of Context->_recno[] + // for fileX.wt. thread2 randomly chooses the highest recno (which + // has not yet been inserted by thread1), and when it accesses + // the record will get WT_NOTFOUND. It should be somewhat rare + // (and most likely when the threads are first beginning). Any + // WT_NOTFOUND returns are allowed and get their own statistic bumped. + switch (op->_optype) { + case Operation::OP_INSERT: + track = &_stats.insert; + recno = workgen_atomic_add64(&_icontext->_recno[tint], 1); + break; + case Operation::OP_REMOVE: + track = &_stats.remove; + recno = op_get_key_recno(op, tint); + break; + case Operation::OP_SEARCH: + track = &_stats.read; + recno = op_get_key_recno(op, tint); + break; + case Operation::OP_UPDATE: + track = &_stats.update; + recno = op_get_key_recno(op, tint); + break; + case Operation::OP_NONE: + recno = 0; + break; + } + + measure_latency = track != NULL && track->ops != 0 && + track->track_latency() && + (track->ops % _workload->options.sample_rate == 0); + + timespec start; + if (measure_latency) + workgen_epoch(&start); + + if (op->_transaction != NULL) { + if (_in_transaction) + THROW("nested transactions not supported"); + _session->begin_transaction(_session, + op->_transaction->_begin_config.c_str()); + _in_transaction = true; + } + if (op->_optype != Operation::OP_NONE) { + op->kv_gen(true, recno, _keybuf); + cursor->set_key(cursor, _keybuf); + if (OP_HAS_VALUE(op)) { + op->kv_gen(false, recno, _valuebuf); + cursor->set_value(cursor, _valuebuf); + } + switch (op->_optype) { + case Operation::OP_INSERT: + WT_ERR(cursor->insert(cursor)); + break; + case Operation::OP_REMOVE: + WT_ERR_NOTFOUND_OK(cursor->remove(cursor)); + break; + case Operation::OP_SEARCH: + ret = cursor->search(cursor); + break; + case Operation::OP_UPDATE: + WT_ERR_NOTFOUND_OK(cursor->update(cursor)); + break; + default: + ASSERT(false); + } + if (ret != 0) { + track = &_stats.not_found; + ret = 0; // WT_NOTFOUND allowed. + } + cursor->reset(cursor); + } + if (measure_latency) { + timespec stop; + workgen_epoch(&stop); + track->incr_with_latency(ts_us(stop - start)); + } else if (track != NULL) + track->incr(); + + if (op->_group != NULL) + for (int count = 0; !_stop && count < op->_repeatgroup; count++) + for (std::vector::iterator i = op->_group->begin(); + i != op->_group->end(); i++) + WT_ERR(op_run(&*i)); +err: + if (op->_transaction != NULL) { + if (ret != 0 || op->_transaction->_rollback) + WT_TRET(_session->rollback_transaction(_session, NULL)); + else + ret = _session->commit_transaction(_session, + op->_transaction->_commit_config.c_str()); + _in_transaction = false; + } + return (ret); +} + +#ifdef _DEBUG +std::string ThreadRunner::get_debug() { + return (_debug_messages.str()); +} +#endif + +Throttle::Throttle(ThreadRunner &runner, double throttle, + double throttle_burst) : _runner(runner), _throttle(throttle), + _burst(throttle_burst), _next_div(), _ops_delta(0), _ops_prev(0), + _ops_per_div(0), _ms_per_div(0), _started(false) { + ts_clear(_next_div); + _ms_per_div = ceill(1000.0 / THROTTLE_PER_SEC); + _ops_per_div = ceill(_throttle / THROTTLE_PER_SEC); +} + +Throttle::~Throttle() {} + +// Given a random 32-bit value, return a float value equally distributed +// between -1.0 and 1.0. +static float rand_signed(uint32_t r) { + int sign = ((r & 0x1) == 0 ? 1 : -1); + return (((float)r * sign) / UINT32_MAX); +} + +// Each time throttle is called, we sleep and return a number of operations to +// perform next. To implement this we keep a time calculation in _next_div set +// initially to the current time + 1/THROTTLE_PER_SEC. Each call to throttle +// advances _next_div by 1/THROTTLE_PER_SEC, and if _next_div is in the future, +// we sleep for the difference between the _next_div and the current_time. We +// always return (Thread.options.throttle / THROTTLE_PER_SEC) as the number of +// operations. +// +// The only variation is that the amount of individual sleeps is modified by a +// random amount (which varies more widely as Thread.options.throttle_burst is +// greater). This has the effect of randomizing how much clumping happens, and +// ensures that multiple threads aren't executing in lock step. +// +int Throttle::throttle(uint64_t op_count, uint64_t *op_limit) { + uint64_t ops; + int64_t sleep_ms; + timespec now; + + workgen_epoch(&now); + DEBUG_CAPTURE(_runner, "throttle: ops=" << op_count); + if (!_started) { + _next_div = ts_add_ms(now, _ms_per_div); + _started = true; + } else { + _ops_delta += (op_count - _ops_prev); + if (now < _next_div) { + sleep_ms = ts_ms(_next_div - now); + sleep_ms += (_ms_per_div * _burst * + rand_signed(workgen_random(_runner._rand_state))); + if (sleep_ms > 0) { + DEBUG_CAPTURE(_runner, ", sleep=" << sleep_ms); + usleep((useconds_t)ms_to_us(sleep_ms)); + } + } + _next_div = ts_add_ms(_next_div, _ms_per_div); + } + ops = _ops_per_div; + if (_ops_delta < (int64_t)ops) { + ops -= _ops_delta; + _ops_delta = 0; + } else { + _ops_delta -= ops; + ops = 0; + } + *op_limit = ops; + _ops_prev = ops; + DEBUG_CAPTURE(_runner, ", return=" << ops << std::endl); + return (0); +} + +ThreadOptions::ThreadOptions() : name(), throttle(0.0), throttle_burst(1.0), + _options() { + _options.add_string("name", name, "name of the thread"); + _options.add_double("throttle", throttle, + "Limit to this number of operations per second"); + _options.add_double("throttle_burst", throttle_burst, + "Changes characteristic of throttling from smooth (0.0) " + "to having large bursts with lulls (10.0 or larger)"); +} +ThreadOptions::ThreadOptions(const ThreadOptions &other) : + name(other.name), throttle(other.throttle), + throttle_burst(other.throttle_burst), _options(other._options) {} +ThreadOptions::~ThreadOptions() {} + +void +ThreadListWrapper::extend(const ThreadListWrapper &other) { + for (std::vector::const_iterator i = other._threads.begin(); + i != other._threads.end(); i++) + _threads.push_back(*i); +} + +void +ThreadListWrapper::append(const Thread &t) { + _threads.push_back(t); +} + +void +ThreadListWrapper::multiply(const int n) { + if (n == 0) { + _threads.clear(); + } else { + std::vector copy(_threads); + for (int cnt = 1; cnt < n; cnt++) + extend(copy); + } +} + +Thread::Thread() : options(), _op() { +} + +Thread::Thread(const Operation &op) : options(), _op(op) { +} + +Thread::Thread(const Thread &other) : options(other.options), _op(other._op) { +} + +Thread::~Thread() { +} + +void Thread::describe(std::ostream &os) const { + os << "Thread: [" << std::endl; + _op.describe(os); os << std::endl; + os << "]"; +} + +Operation::Operation() : + _optype(OP_NONE), _table(), _key(), _value(), _transaction(NULL), + _group(NULL), _repeatgroup(0), + _keysize(0), _valuesize(0), _keymax(0), _valuemax(0) { +} + +Operation::Operation(OpType optype, Table table, Key key, Value value) : + _optype(optype), _table(table), _key(key), _value(value), + _transaction(NULL), _group(NULL), _repeatgroup(0), + _keysize(0), _valuesize(0), _keymax(0), _valuemax(0) { + size_check(); +} + +Operation::Operation(OpType optype, Table table, Key key) : + _optype(optype), _table(table), _key(key), _value(), _transaction(NULL), + _group(NULL), _repeatgroup(0), + _keysize(0), _valuesize(0), _keymax(0), _valuemax(0) { + size_check(); +} + +Operation::Operation(OpType optype, Table table) : + _optype(optype), _table(table), _key(), _value(), _transaction(NULL), + _group(NULL), _repeatgroup(0), + _keysize(0), _valuesize(0), _keymax(0), _valuemax(0) { + size_check(); +} + +Operation::Operation(const Operation &other) : + _optype(other._optype), _table(other._table), _key(other._key), + _value(other._value), _transaction(other._transaction), + _group(other._group), _repeatgroup(other._repeatgroup), + _keysize(other._keysize), _valuesize(other._valuesize), + _keymax(other._keymax), _valuemax(other._valuemax) { + // Creation and destruction of _group and _transaction is managed + // by Python. +} + +Operation::~Operation() { + // Creation and destruction of _group, _transaction is managed by Python. +} + +Operation& Operation::operator=(const Operation &other) { + _optype = other._optype; + _table = other._table; + _key = other._key; + _value = other._value; + _transaction = other._transaction; + _group = other._group; + _repeatgroup = other._repeatgroup; + _keysize = other._keysize; + _valuesize = other._valuesize; + _keymax = other._keymax; + _valuemax = other._valuemax; + return (*this); +} + +void Operation::describe(std::ostream &os) const { + os << "Operation: " << _optype; + if (_optype != OP_NONE) { + os << ", "; _table.describe(os); + os << ", "; _key.describe(os); + os << ", "; _value.describe(os); + } + if (_transaction != NULL) { + os << ", ["; _transaction->describe(os); os << "]"; + } + if (_group != NULL) { + os << ", group[" << _repeatgroup << "]: {"; + bool first = true; + for (std::vector::const_iterator i = _group->begin(); + i != _group->end(); i++) { + if (!first) + os << "}, {"; + i->describe(os); + first = false; + } + os << "}"; + } +} + +void Operation::get_static_counts(Stats &stats, int multiplier) { + switch (_optype) { + case OP_NONE: + break; + case OP_INSERT: + stats.insert.ops += multiplier; + break; + case OP_REMOVE: + stats.remove.ops += multiplier; + break; + case OP_SEARCH: + stats.read.ops += multiplier; + break; + case OP_UPDATE: + stats.update.ops += multiplier; + break; + default: + ASSERT(false); + } + if (_group != NULL) + for (std::vector::iterator i = _group->begin(); + i != _group->end(); i++) + i->get_static_counts(stats, multiplier * _repeatgroup); +} + +void Operation::kv_compute_max(bool iskey) { + uint64_t max; + int size; + + size = iskey ? _key._size : _value._size; + if (size == 0) + size = iskey ? _table.options.key_size : _table.options.value_size; + + if (iskey && size < 2) + THROW("Key.size too small for table '" << _table._uri << "'"); + if (!iskey && size < 1) + THROW("Value.size too small for table '" << _table._uri << "'"); + + if (size > 1) + max = power64(10, (size - 1)) - 1; + else + max = 0; + + if (iskey) { + _keysize = size; + _keymax = max; + } else { + _valuesize = size; + _valuemax = max; + } +} + +void Operation::kv_size_buffer(bool iskey, size_t &maxsize) const { + if (iskey) { + if ((size_t)_keysize > maxsize) + maxsize = _keysize; + } else { + if ((size_t)_valuesize > maxsize) + maxsize = _valuesize; + } +} + +void Operation::kv_gen(bool iskey, uint64_t n, char *result) const { + uint64_t max; + int size; + + size = iskey ? _keysize : _valuesize; + max = iskey ? _keymax : _valuemax; + if (n > max) + THROW((iskey ? "Key" : "Value") << " (" << n + << ") too large for size (" << size << ")"); + workgen_u64_to_string_zf(n, result, size); +} + +void Operation::size_check() const { + if (_optype != OP_NONE && _key._size == 0 && _table.options.key_size == 0) + THROW("operation requires a key size"); + if (OP_HAS_VALUE(this) && _value._size == 0 && + _table.options.value_size == 0) + THROW("operation requires a value size"); +} + +Track::Track(bool latency_tracking) : ops(0), latency_ops(0), latency(0), + min_latency(0), max_latency(0), us(NULL), ms(NULL), sec(NULL) { + track_latency(latency_tracking); +} + +Track::Track(const Track &other) : ops(other.ops), + latency_ops(other.latency_ops), latency(other.latency), + min_latency(other.min_latency), max_latency(other.max_latency), + us(NULL), ms(NULL), sec(NULL) { + if (other.us != NULL) { + us = new uint32_t[LATENCY_US_BUCKETS]; + ms = new uint32_t[LATENCY_MS_BUCKETS]; + sec = new uint32_t[LATENCY_SEC_BUCKETS]; + memcpy(us, other.us, sizeof(uint32_t) * LATENCY_US_BUCKETS); + memcpy(ms, other.ms, sizeof(uint32_t) * LATENCY_MS_BUCKETS); + memcpy(sec, other.sec, sizeof(uint32_t) * LATENCY_SEC_BUCKETS); + } +} + +Track::~Track() { + if (us != NULL) { + delete us; + delete ms; + delete sec; + } +} + +void Track::add(Track &other, bool reset) { + ops += other.ops; + latency_ops += other.latency_ops; + latency += other.latency; + + min_latency = MIN(min_latency, other.min_latency); + if (reset) + other.min_latency = 0; + max_latency = MAX(max_latency, other.max_latency); + if (reset) + other.max_latency = 0; + + if (us != NULL && other.us != NULL) { + for (int i = 0; i < LATENCY_US_BUCKETS; i++) + us[i] += other.us[i]; + for (int i = 0; i < LATENCY_MS_BUCKETS; i++) + ms[i] += other.ms[i]; + for (int i = 0; i < LATENCY_SEC_BUCKETS; i++) + sec[i] += other.sec[i]; + } +} + +void Track::assign(const Track &other) { + ops = other.ops; + latency_ops = other.latency_ops; + latency = other.latency; + min_latency = other.min_latency; + max_latency = other.max_latency; + + if (other.us == NULL && us != NULL) { + delete us; + delete ms; + delete sec; + us = NULL; + ms = NULL; + sec = NULL; + } + else if (other.us != NULL && us == NULL) { + us = new uint32_t[LATENCY_US_BUCKETS]; + ms = new uint32_t[LATENCY_MS_BUCKETS]; + sec = new uint32_t[LATENCY_SEC_BUCKETS]; + } + if (us != NULL) { + memcpy(us, other.us, sizeof(uint32_t) * LATENCY_US_BUCKETS); + memcpy(ms, other.ms, sizeof(uint32_t) * LATENCY_MS_BUCKETS); + memcpy(sec, other.sec, sizeof(uint32_t) * LATENCY_SEC_BUCKETS); + } +} + +uint64_t Track::average_latency() const { + if (latency_ops == 0) + return (0); + else + return (latency / latency_ops); +} + +void Track::clear() { + ops = 0; + latency_ops = 0; + latency = 0; + min_latency = 0; + max_latency = 0; + if (us != NULL) { + memset(us, 0, sizeof(uint32_t) * LATENCY_US_BUCKETS); + memset(ms, 0, sizeof(uint32_t) * LATENCY_MS_BUCKETS); + memset(sec, 0, sizeof(uint32_t) * LATENCY_SEC_BUCKETS); + } +} + +void Track::incr() { + ops++; +} + +void Track::incr_with_latency(uint64_t usecs) { + ASSERT(us != NULL); + + ops++; + latency_ops++; + latency += usecs; + if (usecs > max_latency) + max_latency = (uint32_t)usecs; + if (usecs < min_latency) + min_latency = (uint32_t)usecs; + + // Update a latency bucket. + // First buckets: usecs from 100us to 1000us at 100us each. + if (usecs < LATENCY_US_BUCKETS) + us[usecs]++; + + // Second buckets: milliseconds from 1ms to 1000ms, at 1ms each. + else if (usecs < ms_to_us(LATENCY_MS_BUCKETS)) + ms[us_to_ms(usecs)]++; + + // Third buckets are seconds from 1s to 100s, at 1s each. + else if (usecs < sec_to_us(LATENCY_SEC_BUCKETS)) + sec[us_to_sec(usecs)]++; + + // >100 seconds, accumulate in the biggest bucket. */ + else + sec[LATENCY_SEC_BUCKETS - 1]++; +} + +void Track::subtract(const Track &other) { + ops -= other.ops; + latency_ops -= other.latency_ops; + latency -= other.latency; + + // There's no sensible thing to be done for min/max_latency. + + if (us != NULL && other.us != NULL) { + for (int i = 0; i < LATENCY_US_BUCKETS; i++) + us[i] -= other.us[i]; + for (int i = 0; i < LATENCY_MS_BUCKETS; i++) + ms[i] -= other.ms[i]; + for (int i = 0; i < LATENCY_SEC_BUCKETS; i++) + sec[i] -= other.sec[i]; + } +} + +// If there are no entries in this Track, take them from +// a previous Track. Used to smooth graphs. We don't worry +// about latency buckets here. +void Track::smooth(const Track &other) { + if (latency_ops == 0) { + ops = other.ops; + latency = other.latency; + latency_ops = other.latency_ops; + min_latency = other.min_latency; + max_latency = other.max_latency; + } +} + +void Track::track_latency(bool newval) { + if (newval) { + if (us == NULL) { + us = new uint32_t[LATENCY_US_BUCKETS]; + ms = new uint32_t[LATENCY_MS_BUCKETS]; + sec = new uint32_t[LATENCY_SEC_BUCKETS]; + memset(us, 0, sizeof(uint32_t) * LATENCY_US_BUCKETS); + memset(ms, 0, sizeof(uint32_t) * LATENCY_MS_BUCKETS); + memset(sec, 0, sizeof(uint32_t) * LATENCY_SEC_BUCKETS); + } + } else { + if (us != NULL) { + delete us; + delete ms; + delete sec; + us = NULL; + ms = NULL; + sec = NULL; + } + } +} + +void Track::_get_us(long *result) { + if (us != NULL) { + for (int i = 0; i < LATENCY_US_BUCKETS; i++) + result[i] = (long)us[i]; + } else + memset(result, 0, sizeof(long) * LATENCY_US_BUCKETS); +} +void Track::_get_ms(long *result) { + if (ms != NULL) { + for (int i = 0; i < LATENCY_MS_BUCKETS; i++) + result[i] = (long)ms[i]; + } else + memset(result, 0, sizeof(long) * LATENCY_MS_BUCKETS); +} +void Track::_get_sec(long *result) { + if (sec != NULL) { + for (int i = 0; i < LATENCY_SEC_BUCKETS; i++) + result[i] = (long)sec[i]; + } else + memset(result, 0, sizeof(long) * LATENCY_SEC_BUCKETS); +} + +Stats::Stats(bool latency) : insert(latency), not_found(latency), + read(latency), remove(latency), update(latency), truncate(latency) { +} + +Stats::Stats(const Stats &other) : insert(other.insert), + not_found(other.not_found), read(other.read), remove(other.remove), + update(other.update), truncate(other.truncate) { +} + +Stats::~Stats() {} + +void Stats::add(Stats &other, bool reset) { + insert.add(other.insert, reset); + not_found.add(other.not_found, reset); + read.add(other.read, reset); + remove.add(other.remove, reset); + update.add(other.update, reset); + truncate.add(other.truncate, reset); +} + +void Stats::assign(const Stats &other) { + insert.assign(other.insert); + not_found.assign(other.not_found); + read.assign(other.read); + remove.assign(other.remove); + update.assign(other.update); + truncate.assign(other.truncate); +} + +void Stats::clear() { + insert.clear(); + not_found.clear(); + read.clear(); + remove.clear(); + update.clear(); + truncate.clear(); +} + +void Stats::describe(std::ostream &os) const { + os << "Stats: reads " << read.ops; + if (not_found.ops > 0) { + os << " (" << not_found.ops << " not found)"; + } + os << ", inserts " << insert.ops; + os << ", updates " << update.ops; + os << ", truncates " << truncate.ops; + os << ", removes " << remove.ops; +} + +void Stats::final_report(std::ostream &os, timespec &totalsecs) const { + uint64_t ops = 0; + ops += read.ops; + ops += not_found.ops; + ops += insert.ops; + ops += update.ops; + ops += truncate.ops; + ops += remove.ops; + +#define FINAL_OUTPUT(os, field, singular, ops, totalsecs) \ + os << "Executed " << field << " " #singular " operations (" \ + << PCT(field, ops) << "%) " << OPS_PER_SEC(field, totalsecs) \ + << " ops/sec" << std::endl + + FINAL_OUTPUT(os, read.ops, read, ops, totalsecs); + FINAL_OUTPUT(os, not_found.ops, not found, ops, totalsecs); + FINAL_OUTPUT(os, insert.ops, insert, ops, totalsecs); + FINAL_OUTPUT(os, update.ops, update, ops, totalsecs); + FINAL_OUTPUT(os, truncate.ops, truncate, ops, totalsecs); + FINAL_OUTPUT(os, remove.ops, remove, ops, totalsecs); +} + +void Stats::report(std::ostream &os) const { + os << read.ops << " reads"; + if (not_found.ops > 0) { + os << " (" << not_found.ops << " not found)"; + } + os << ", " << insert.ops << " inserts, "; + os << update.ops << " updates, "; + os << truncate.ops << " truncates, "; + os << remove.ops << " removes"; +} + +void Stats::smooth(const Stats &other) { + insert.smooth(other.insert); + not_found.smooth(other.not_found); + read.smooth(other.read); + remove.smooth(other.remove); + update.smooth(other.update); + truncate.smooth(other.truncate); +} + +void Stats::subtract(const Stats &other) { + insert.subtract(other.insert); + not_found.subtract(other.not_found); + read.subtract(other.read); + remove.subtract(other.remove); + update.subtract(other.update); + truncate.subtract(other.truncate); +} + +void Stats::track_latency(bool latency) { + insert.track_latency(latency); + not_found.track_latency(latency); + read.track_latency(latency); + remove.track_latency(latency); + update.track_latency(latency); + truncate.track_latency(latency); +} + +TableOptions::TableOptions() : key_size(0), value_size(0), _options() { + _options.add_int("key_size", key_size, + "default size of the key, unless overridden by Key.size"); + _options.add_int("value_size", value_size, + "default size of the value, unless overridden by Value.size"); +} +TableOptions::TableOptions(const TableOptions &other) : + key_size(other.key_size), value_size(other.value_size), + _options(other._options) {} +TableOptions::~TableOptions() {} + +Table::Table() : options(), _uri(), _internal(new TableInternal()) { +} +Table::Table(const char *uri) : options(), _uri(uri), + _internal(new TableInternal()) { +} +Table::Table(const Table &other) : options(other.options), _uri(other._uri), + _internal(new TableInternal(*other._internal)) { +} +Table::~Table() { delete _internal; } +Table& Table::operator=(const Table &other) { + options = other.options; + _uri = other._uri; + *_internal = *other._internal; + return (*this); +} + +void Table::describe(std::ostream &os) const { + os << "Table: " << _uri; +} + +TableInternal::TableInternal() : _tint(0), _context_count(0) {} +TableInternal::TableInternal(const TableInternal &other) : _tint(other._tint), + _context_count(other._context_count) {} +TableInternal::~TableInternal() {} + +WorkloadOptions::WorkloadOptions() : max_latency(0), + report_file("workload.stat"), report_interval(0), + run_time(0), sample_interval(0), sample_rate(1), + _options() { + _options.add_int("max_latency", max_latency, + "prints warning if any latency measured exceeds this number of " + "milliseconds. Requires sample_interval to be configured."); + _options.add_int("report_interval", report_interval, + "output throughput information every interval seconds, 0 to disable"); + _options.add_string("report_file", report_file, + "file name for collecting run output, " + "including output from the report_interval option. " + "The file name is relative to the connection's home directory. " + "When set to the empty string, stdout is used."); + _options.add_int("run_time", run_time, "total workload seconds"); + _options.add_int("sample_interval", sample_interval, + "performance logging every interval seconds, 0 to disable"); + _options.add_int("sample_rate", sample_rate, + "how often the latency of operations is measured. 1 for every operation, " + "2 for every second operation, 3 for every third operation etc."); +} + +WorkloadOptions::WorkloadOptions(const WorkloadOptions &other) : + max_latency(other.max_latency), report_interval(other.report_interval), + run_time(other.run_time), sample_interval(other.sample_interval), + sample_rate(other.sample_rate), _options(other._options) {} +WorkloadOptions::~WorkloadOptions() {} + +Workload::Workload(Context *context, const ThreadListWrapper &tlw) : + options(), stats(), _context(context), _threads(tlw._threads) { + if (context == NULL) + THROW("Workload contructor requires a Context"); +} + +Workload::Workload(Context *context, const Thread &thread) : + options(), stats(), _context(context), _threads() { + if (context == NULL) + THROW("Workload contructor requires a Context"); + _threads.push_back(thread); +} + +Workload::Workload(const Workload &other) : + options(other.options), stats(other.stats), _context(other._context), + _threads(other._threads) {} +Workload::~Workload() {} + +Workload& Workload::operator=(const Workload &other) { + options = other.options; + stats.assign(other.stats); + *_context = *other._context; + _threads = other._threads; + return (*this); +} + +int Workload::run(WT_CONNECTION *conn) { + WorkloadRunner runner(this); + + return (runner.run(conn)); +} + +WorkloadRunner::WorkloadRunner(Workload *workload) : + _workload(workload), _trunners(workload->_threads.size()), + _report_out(&std::cout), _start() { + ts_clear(_start); +} +WorkloadRunner::~WorkloadRunner() {} + +int WorkloadRunner::run(WT_CONNECTION *conn) { + WT_DECL_RET; + WorkloadOptions *options = &_workload->options; + std::ofstream report_out; + + _wt_home = conn->get_home(conn); + if (options->sample_interval > 0 && options->sample_rate <= 0) + THROW("Workload.options.sample_rate must be positive"); + if (!options->report_file.empty()) { + open_report_file(report_out, options->report_file.c_str(), + "Workload.options.report_file"); + _report_out = &report_out; + } + WT_ERR(create_all(conn, _workload->_context)); + WT_ERR(open_all()); + WT_ERR(ThreadRunner::cross_check(_trunners)); + WT_ERR(run_all()); + err: + //TODO: (void)close_all(); + _report_out = &std::cout; + return (ret); +} + +int WorkloadRunner::open_all() { + for (size_t i = 0; i < _trunners.size(); i++) { + WT_RET(_trunners[i].open_all()); + } + return (0); +} + +void WorkloadRunner::open_report_file(std::ofstream &of, const char *filename, + const char *desc) { + std::stringstream sstm; + + if (!_wt_home.empty()) + sstm << _wt_home << "/"; + sstm << filename; + of.open(sstm.str().c_str(), std::fstream::app); + if (!of) + THROW_ERRNO(errno, desc << ": \"" << sstm.str() + << "\" could not be opened"); +} + +int WorkloadRunner::create_all(WT_CONNECTION *conn, Context *context) { + for (size_t i = 0; i < _trunners.size(); i++) { + ThreadRunner *runner = &_trunners[i]; + std::stringstream sstm; + Thread *thread = &_workload->_threads[i]; + if (thread->options.name.empty()) { + sstm << "thread" << i; + thread->options.name = sstm.str(); + } + runner->_thread = thread; + runner->_context = context; + runner->_icontext = context->_internal; + runner->_workload = _workload; + runner->_wrunner = this; + runner->_number = (uint32_t)i; + // TODO: recover from partial failure here + WT_RET(runner->create_all(conn)); + } + WT_RET(context->_internal->create_all()); + return (0); +} + +int WorkloadRunner::close_all() { + for (size_t i = 0; i < _trunners.size(); i++) + _trunners[i].close_all(); + + return (0); +} + +void WorkloadRunner::get_stats(Stats *result) { + for (size_t i = 0; i < _trunners.size(); i++) + result->add(_trunners[i]._stats); +} + +void WorkloadRunner::report(time_t interval, time_t totalsecs, + Stats *prev_totals) { + std::ostream &out = *_report_out; + Stats new_totals(prev_totals->track_latency()); + + get_stats(&new_totals); + Stats diff(new_totals); + diff.subtract(*prev_totals); + prev_totals->assign(new_totals); + diff.report(out); + out << " in " << interval << " secs (" + << totalsecs << " total secs)" << std::endl; +} + +void WorkloadRunner::final_report(timespec &totalsecs) { + std::ostream &out = *_report_out; + Stats *stats = &_workload->stats; + + stats->clear(); + stats->track_latency(_workload->options.sample_interval > 0); + + get_stats(stats); + stats->final_report(out, totalsecs); + out << "Run completed: " << totalsecs << " seconds" << std::endl; +} + +int WorkloadRunner::run_all() { + void *status; + std::vector thread_handles; + Stats counts(false); + WorkgenException *exception; + WorkloadOptions *options = &_workload->options; + Monitor monitor(*this); + std::ofstream monitor_out; + std::ostream &out = *_report_out; + WT_DECL_RET; + + for (size_t i = 0; i < _trunners.size(); i++) + _trunners[i].get_static_counts(counts); + out << "Starting workload: " << _trunners.size() << " threads, "; + counts.report(out); + out << std::endl; + + workgen_epoch(&_start); + timespec end = _start + options->run_time; + timespec next_report = _start + options->report_interval; + + // Start all threads + if (options->sample_interval > 0) { + open_report_file(monitor_out, "monitor", "monitor output file"); + monitor._out = &monitor_out; + + if ((ret = pthread_create(&monitor._handle, NULL, monitor_main, + &monitor)) != 0) { + std::cerr << "monitor thread failed err=" << ret << std::endl; + return (ret); + } + } + + for (size_t i = 0; i < _trunners.size(); i++) { + pthread_t thandle; + ThreadRunner *runner = &_trunners[i]; + runner->_stop = false; + runner->_repeat = (options->run_time != 0); + if ((ret = pthread_create(&thandle, NULL, thread_runner_main, + runner)) != 0) { + std::cerr << "pthread_create failed err=" << ret << std::endl; + std::cerr << "Stopping all threads." << std::endl; + for (size_t j = 0; j < thread_handles.size(); j++) { + _trunners[j]._stop = true; + (void)pthread_join(thread_handles[j], &status); + _trunners[j].close_all(); + } + return (ret); + } + thread_handles.push_back(thandle); + runner->_stats.clear(); + } + + // Let the test run, reporting as needed. + Stats curstats(false); + timespec now = _start; + while (now < end) { + timespec sleep_amt; + + sleep_amt = end - now; + if (next_report != 0) { + timespec next_diff = next_report - now; + if (next_diff < next_report) + sleep_amt = next_diff; + } + if (sleep_amt.tv_sec > 0) + sleep((unsigned int)sleep_amt.tv_sec); + else + usleep((useconds_t)((sleep_amt.tv_nsec + 999)/ 1000)); + + workgen_epoch(&now); + if (now >= next_report && now < end && options->report_interval != 0) { + report(options->report_interval, (now - _start).tv_sec, &curstats); + while (now >= next_report) + next_report += options->report_interval; + } + } + + // signal all threads to stop + if (options->run_time != 0) + for (size_t i = 0; i < _trunners.size(); i++) + _trunners[i]._stop = true; + if (options->sample_interval > 0) + monitor._stop = true; + + // wait for all threads + exception = NULL; + for (size_t i = 0; i < _trunners.size(); i++) { + WT_TRET(pthread_join(thread_handles[i], &status)); + if (_trunners[i]._errno != 0) + VERBOSE(_trunners[i], + "Thread " << i << " has errno " << _trunners[i]._errno); + WT_TRET(_trunners[i]._errno); + _trunners[i].close_all(); + if (exception == NULL && !_trunners[i]._exception._str.empty()) + exception = &_trunners[i]._exception; + } + if (options->sample_interval > 0) { + WT_TRET(pthread_join(monitor._handle, &status)); + if (monitor._errno != 0) + std::cerr << "Monitor thread has errno " << monitor._errno + << std::endl; + if (exception == NULL && !monitor._exception._str.empty()) + exception = &monitor._exception; + } + + // issue the final report + timespec finalsecs = now - _start; + final_report(finalsecs); + + if (ret != 0) + std::cerr << "run_all failed err=" << ret << std::endl; + (*_report_out) << std::endl; + if (exception != NULL) + throw *exception; + return (ret); +} + +}; diff --git a/bench/workgen/workgen.h b/bench/workgen/workgen.h new file mode 100644 index 00000000000..c1ae01ed5a4 --- /dev/null +++ b/bench/workgen/workgen.h @@ -0,0 +1,410 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include + +namespace workgen { + +struct ContextInternal; +struct TableInternal; +struct Thread; +struct Transaction; + +#ifndef SWIG +struct OptionsList { + OptionsList(); + OptionsList(const OptionsList &other); + + void add_int(const char *name, int default_value, const char *desc); + void add_bool(const char *name, bool default_value, const char *desc); + void add_double(const char *name, double default_value, const char *desc); + void add_string(const char *name, const std::string &default_value, + const char *desc); + + std::string help() const; + std::string help_description(const char *option_name) const; + std::string help_type(const char *option_name) const; + +private: + void add_option(const char *name, const std::string typestr, + const char *desc); + typedef std::pair TypeDescPair; + std::map _option_map; +}; +#endif + +// These classes are all exposed to Python via SWIG. While they may contain +// data that is private to C++, such data must not prevent the objects from +// being shared. Tables, Keys, Values, Operations and Threads can be shared: a +// single Key object might appear in many operations; Operations may appear +// multiple times in a Thread or in different Threads; the same Thread may +// appear multiple times in a Workload list, etc. +// +// Certain kinds of state are allowed: A Table contains a unique pointer that +// is used within the internal part of the Context. Stats contain lots +// of state, but is made available after a Workload.run(). +// +// Python controls the lifetime of (nearly) all objects of these classes. +// The exception is Stat/Track objects, which are also created/used +// internally to calculate and show statistics during a run. +// +struct Track { + // Threads maintain the total thread operation and total latency they've + // experienced. + + uint64_t ops; // Total operations */ + uint64_t latency_ops; // Total ops sampled for latency + uint64_t latency; // Total latency */ + + // Minimum/maximum latency, shared with the monitor thread, that is, the + // monitor thread clears it so it's recalculated again for each period. + + uint32_t min_latency; // Minimum latency (uS) + uint32_t max_latency; // Maximum latency (uS) + + Track(bool latency_tracking = false); + Track(const Track &other); + ~Track(); + + void add(Track&, bool reset = false); + void assign(const Track&); + uint64_t average_latency() const; + void clear(); + void incr(); + void incr_with_latency(uint64_t usecs); + void smooth(const Track&); + void subtract(const Track&); + void track_latency(bool); + bool track_latency() const { return (us != NULL); } + + void _get_us(long *); + void _get_ms(long *); + void _get_sec(long *); + +private: + // Latency buckets. From python, accessed via methods us(), ms(), sec() + uint32_t *us; // < 1us ... 1000us + uint32_t *ms; // < 1ms ... 1000ms + uint32_t *sec; // < 1s 2s ... 100s + + Track & operator=(const Track &other); // use explicit assign method +}; + +struct Stats { + Track insert; + Track not_found; + Track read; + Track remove; + Track update; + Track truncate; + + Stats(bool latency = false); + Stats(const Stats &other); + ~Stats(); + + void add(Stats&, bool reset = false); + void assign(const Stats&); + void clear(); + void describe(std::ostream &os) const; +#ifndef SWIG + void final_report(std::ostream &os, timespec &totalsecs) const; + void report(std::ostream &os) const; +#endif + void smooth(const Stats&); + void subtract(const Stats&); + void track_latency(bool); + bool track_latency() const { return (insert.track_latency()); } + +private: + Stats & operator=(const Stats &other); // use explicit assign method +}; + +// A Context tracks the current record number for each uri, used +// for key generation. +// +struct Context { + bool _verbose; + ContextInternal *_internal; + + Context(); + ~Context(); + void describe(std::ostream &os) const { + os << "Context: verbose " << (_verbose ? "true" : "false"); + } + +#ifndef SWIG + Context& operator=(const Context &other); +#endif +}; + +// To prevent silent errors, this class is set up in Python so that new +// properties are prevented, only existing properties can be set. +// +struct TableOptions { + int key_size; + int value_size; + + TableOptions(); + TableOptions(const TableOptions &other); + ~TableOptions(); + + void describe(std::ostream &os) const { + os << "key_size " << key_size; + os << ", value_size " << value_size; + } + + std::string help() const { return _options.help(); } + std::string help_description(const char *option_name) const { + return _options.help_description(option_name); } + std::string help_type(const char *option_name) const { + return _options.help_type(option_name); } + +private: + OptionsList _options; +}; + +struct Table { + TableOptions options; + std::string _uri; + TableInternal *_internal; + + /* XXX select table from range */ + + Table(); + Table(const char *tablename); + Table(const Table &other); + ~Table(); + + void describe(std::ostream &os) const; + +#ifndef SWIG + Table& operator=(const Table &other); +#endif +}; + +struct Key { + typedef enum { + KEYGEN_AUTO, KEYGEN_APPEND, KEYGEN_PARETO, KEYGEN_UNIFORM } KeyType; + KeyType _keytype; + int _size; + + /* XXX specify more about key distribution */ + Key() : _keytype(KEYGEN_AUTO), _size(0) {} + Key(KeyType keytype, int size) : _keytype(keytype), _size(size) {} + Key(const Key &other) : _keytype(other._keytype), _size(other._size) {} + ~Key() {} + + void describe(std::ostream &os) const { + os << "Key: type " << _keytype << ", size " << _size; } +}; + +struct Value { + int _size; + + /* XXX specify how value is calculated */ + Value() : _size(0) {} + Value(int size) : _size(size) {} + Value(const Value &other) : _size(other._size) {} + ~Value() {} + + void describe(std::ostream &os) const { os << "Value: size " << _size; } +}; + +struct Operation { + enum OpType { + OP_NONE, OP_INSERT, OP_REMOVE, OP_SEARCH, OP_UPDATE }; + OpType _optype; + + Table _table; + Key _key; + Value _value; + Transaction *_transaction; + std::vector *_group; + int _repeatgroup; + +#ifndef SWIG + int _keysize; // derived from Key._size and Table.options.key_size + int _valuesize; + uint64_t _keymax; + uint64_t _valuemax; +#endif + + Operation(); + Operation(OpType optype, Table table, Key key, Value value); + Operation(OpType optype, Table table, Key key); + Operation(OpType optype, Table table); + Operation(const Operation &other); + ~Operation(); + + void describe(std::ostream &os) const; +#ifndef SWIG + Operation& operator=(const Operation &other); + void get_static_counts(Stats &stats, int multiplier); + void kv_compute_max(bool); + void kv_gen(bool, uint64_t, char *) const; + void kv_size_buffer(bool iskey, size_t &size) const; + void size_check() const; +#endif +}; + +// To prevent silent errors, this class is set up in Python so that new +// properties are prevented, only existing properties can be set. +// +struct ThreadOptions { + std::string name; + double throttle; + double throttle_burst; + + ThreadOptions(); + ThreadOptions(const ThreadOptions &other); + ~ThreadOptions(); + + void describe(std::ostream &os) const { + os << "throttle " << throttle; + } + + std::string help() const { return _options.help(); } + std::string help_description(const char *option_name) const { + return _options.help_description(option_name); } + std::string help_type(const char *option_name) const { + return _options.help_type(option_name); } + +private: + OptionsList _options; +}; + +// This is a list of threads, which may be used in the Workload constructor. +// It participates with ThreadList defined on the SWIG/Python side and +// some Python operators added to Thread to allow Threads to be easily +// composed using '+' and multiplied (by integer counts) using '*'. +// Users of the workgen API in Python don't ever need to use +// ThreadListWrapper or ThreadList. +struct ThreadListWrapper { + std::vector _threads; + + ThreadListWrapper() : _threads() {} + ThreadListWrapper(const ThreadListWrapper &other) : + _threads(other._threads) {} + ThreadListWrapper(const std::vector &threads) : _threads(threads) {} + void extend(const ThreadListWrapper &); + void append(const Thread &); + void multiply(const int); +}; + +struct Thread { + ThreadOptions options; + Operation _op; + + Thread(); + Thread(const Operation &op); + Thread(const Thread &other); + ~Thread(); + + void describe(std::ostream &os) const; +}; + +struct Transaction { + bool _rollback; + std::string _begin_config; + std::string _commit_config; + + Transaction(const char *_config = NULL) : _rollback(false), + _begin_config(_config == NULL ? "" : _config), _commit_config() {} + + void describe(std::ostream &os) const { + os << "Transaction: "; + if (_rollback) + os << "(rollback) "; + os << "begin_config: " << _begin_config; + if (!_commit_config.empty()) + os << ", commit_config: " << _commit_config; + } +}; + +// To prevent silent errors, this class is set up in Python so that new +// properties are prevented, only existing properties can be set. +// +struct WorkloadOptions { + int max_latency; + std::string report_file; + int report_interval; + int run_time; + int sample_interval; + int sample_rate; + + WorkloadOptions(); + WorkloadOptions(const WorkloadOptions &other); + ~WorkloadOptions(); + + void describe(std::ostream &os) const { + os << "run_time " << run_time; + os << ", report_interval " << report_interval; + } + + std::string help() const { return _options.help(); } + std::string help_description(const char *option_name) const { + return _options.help_description(option_name); } + std::string help_type(const char *option_name) const { + return _options.help_type(option_name); } + +private: + OptionsList _options; +}; + +struct Workload { + WorkloadOptions options; + Stats stats; + Context *_context; + std::vector _threads; + + Workload(Context *context, const ThreadListWrapper &threadlist); + Workload(Context *context, const Thread &thread); + Workload(const Workload &other); + ~Workload(); + +#ifndef SWIG + Workload& operator=(const Workload &other); +#endif + + void describe(std::ostream &os) const { + os << "Workload: "; + _context->describe(os); + os << ", "; + options.describe(os); + os << ", [" << std::endl; + for (std::vector::const_iterator i = _threads.begin(); i != _threads.end(); i++) { + os << " "; i->describe(os); os << std::endl; + } + os << "]"; + } + int run(WT_CONNECTION *conn); +}; + +}; diff --git a/bench/workgen/workgen.swig b/bench/workgen/workgen.swig new file mode 100644 index 00000000000..0f74942169c --- /dev/null +++ b/bench/workgen/workgen.swig @@ -0,0 +1,233 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * workgen.swig + * The SWIG interface file defining the workgen python API. + */ + +%include "typemaps.i" +%include "std_vector.i" +%include "std_string.i" +%include "stdint.i" +%include "attribute.i" +%include "carrays.i" + +/* We only need to reference WiredTiger types. */ +%import "wiredtiger.h" + +%{ +#include +#include +#include +#include "wiredtiger.h" +#include "workgen.h" +#include "workgen_int.h" +%} + +%pythoncode %{ +import numbers +%} + +%exception { + try { + $action + } + catch (workgen::WorkgenException &wge) { + SWIG_exception_fail(SWIG_RuntimeError, wge._str.c_str()); + } +} + +/* + * Some functions are long running, turn off signal handling that was enabled + * by the Python interpreter. This means that a signal handler coded in Python + * won't work when spanning a call to one of these long running functions, but + * it's doubtful our test scripts need signals at all. This could be made to + * work, it's just not worth the trouble. + */ +%define InterruptableFunction(funcname) +%exception funcname { + try { + void (*savesig)(int) = signal(SIGINT, SIG_DFL); + $action + (void)signal(SIGINT, savesig); + } + catch (workgen::WorkgenException &wge) { + SWIG_exception_fail(SWIG_RuntimeError, wge._str.c_str()); + } +} +%enddef + +/* + * Define a __str__ function for all public workgen classes. + */ +%define WorkgenClass(classname) +%extend workgen::classname { + const std::string __str__() { + std::ostringstream out; + $self->describe(out); + return out.str(); + } +}; +%enddef + +/* + * To forestall errors, make it impossible to add new attributes to certain + * classes. This trick relies on the implementation of SWIG providing + * predictably named functions in the _workgen namespace to set attributes. + */ +%define WorkgenFrozenClass(classname) +%extend workgen::classname { +%pythoncode %{ + def __setattr__(self, attr, val): + if getattr(self, attr) == None: + raise AttributeError("'" + #classname + + "' object has no attribute '" + attr + "'") + f = _workgen.__dict__[#classname + '_' + attr + '_set'] + f(self, val) +%} +}; +%enddef + +InterruptableFunction(workgen::execute) +InterruptableFunction(workgen::Workload::run) + +%module workgen +/* Parse the header to generate wrappers. */ +%include "workgen.h" + +%template(OpList) std::vector; +%template(ThreadList) std::vector; +%array_class(uint32_t, uint32Array); +%array_class(long, longArray); + +WorkgenClass(Key) +WorkgenClass(Operation) +WorkgenClass(Stats) +WorkgenClass(Table) +WorkgenClass(TableOptions) +WorkgenClass(Thread) +WorkgenClass(ThreadOptions) +WorkgenClass(Transaction) +WorkgenClass(Value) +WorkgenClass(Workload) +WorkgenClass(WorkloadOptions) +WorkgenClass(Context) + +WorkgenFrozenClass(TableOptions) +WorkgenFrozenClass(ThreadOptions) +WorkgenFrozenClass(WorkloadOptions) + +%extend workgen::Operation { +%pythoncode %{ + def __mul__(self, other): + if not isinstance(other, numbers.Integral): + raise Exception('Operation.__mul__ requires an integral number') + op = Operation() + op._group = OpList([self]) + op._repeatgroup = other + return op + + __rmul__ = __mul__ + + def __add__(self, other): + if not isinstance(other, Operation): + raise Exception('Operation.__sum__ requires an Operation') + if self._group == None or self._repeatgroup != 1 or self._transaction != None: + op = Operation() + op._group = OpList([self, other]) + op._repeatgroup = 1 + return op + else: + self._group.append(other) + return self +%} +}; + +%extend workgen::Thread { +%pythoncode %{ + def __mul__(self, other): + if not isinstance(other, numbers.Integral): + raise Exception('Thread.__mul__ requires an integral number') + return ThreadListWrapper(ThreadList([self] * other)) + + __rmul__ = __mul__ + + def __add__(self, other): + if type(self) != type(other): + raise Exception('Thread.__sum__ requires an Thread') + return ThreadListWrapper(ThreadList([self, other])) +%} +}; + +%extend workgen::ThreadListWrapper { +%pythoncode %{ + def __mul__(self, other): + if not isinstance(other, numbers.Integral): + raise Exception('ThreadList.__mul__ requires an integral number') + tlw = ThreadListWrapper(self) + tlw.multiply(other) + return tlw + + __rmul__ = __mul__ + + def __add__(self, other): + tlw = ThreadListWrapper(self) + if isinstance(other, ThreadListWrapper): + tlw.extend(other) + elif isinstance(other, Thread): + tlw.append(other) + else: + raise Exception('ThreadList.__sum__ requires an Thread or ThreadList') + return tlw +%} +}; + +%extend workgen::Track { +%pythoncode %{ + def __longarray(self, size): + result = longArray(size) + result.__len__ = lambda: size + return result + + def us(self): + result = self.__longarray(1000) + self._get_us(result) + return result + + def ms(self): + result = self.__longarray(1000) + self._get_ms(result) + return result + + def sec(self): + result = self.__longarray(100) + self._get_sec(result) + return result +%} +}; diff --git a/bench/workgen/workgen/__init__.py b/bench/workgen/workgen/__init__.py new file mode 100644 index 00000000000..00e8f257546 --- /dev/null +++ b/bench/workgen/workgen/__init__.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# __init__.py +# initialization for workgen module +# +import os, sys + +# After importing the SWIG-generated file, copy all symbols from from it +# to this module so they will appear in the workgen namespace. +me = sys.modules[__name__] +sys.path.append(os.path.dirname(__file__)) # needed for Python3 +import workgen, workgen_util +for module in workgen: + for name in dir(module): + value = getattr(module, name) + setattr(me, name, value) diff --git a/bench/workgen/workgen_func.c b/bench/workgen/workgen_func.c new file mode 100644 index 00000000000..2e1271a515e --- /dev/null +++ b/bench/workgen/workgen_func.c @@ -0,0 +1,89 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "wiredtiger.h" +#include "test_util.h" +#include "workgen_func.h" + +/* workgen_random_state is used as an opaque type handle. */ +typedef struct workgen_random_state { + WT_RAND_STATE state; +} workgen_random_state; + +/* + * These functions call their WiredTiger equivalents. + */ +uint32_t +workgen_atomic_add32(uint32_t *vp, uint32_t v) +{ + return (__wt_atomic_add32(vp, v)); +} + +uint64_t +workgen_atomic_add64(uint64_t *vp, uint64_t v) +{ + return (__wt_atomic_add64(vp, v)); +} + +void +workgen_epoch(struct timespec *tsp) +{ + __wt_epoch(NULL, tsp); +} + +uint32_t +workgen_random(workgen_random_state volatile * rnd_state) +{ + return (__wt_random(&rnd_state->state)); +} + +int +workgen_random_alloc(WT_SESSION *session, workgen_random_state **rnd_state) +{ + workgen_random_state *state; + + state = malloc(sizeof(workgen_random_state)); + if (state == NULL) { + *rnd_state = NULL; + return (ENOMEM); + } + __wt_random_init_seed((WT_SESSION_IMPL *)session, &state->state); + *rnd_state = state; + return (0); +} + +void +workgen_random_free(workgen_random_state *rnd_state) +{ + free(rnd_state); +} + +extern void +workgen_u64_to_string_zf(uint64_t n, char *buf, size_t len) +{ + u64_to_string_zf(n, buf, len); +} diff --git a/bench/workgen/workgen_func.h b/bench/workgen/workgen_func.h new file mode 100644 index 00000000000..20ebf2632cc --- /dev/null +++ b/bench/workgen/workgen_func.h @@ -0,0 +1,44 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +struct workgen_random_state; + +extern uint32_t +workgen_atomic_add32(uint32_t *vp, uint32_t v); +extern uint64_t +workgen_atomic_add64(uint64_t *vp, uint64_t v); +extern void +workgen_epoch(struct timespec *tsp); +extern uint32_t +workgen_random(struct workgen_random_state volatile *rnd_state); +extern int +workgen_random_alloc(WT_SESSION *session, + struct workgen_random_state **rnd_state); +extern void +workgen_random_free(struct workgen_random_state *rnd_state); +extern void +workgen_u64_to_string_zf(uint64_t n, char *buf, size_t len); diff --git a/bench/workgen/workgen_int.h b/bench/workgen/workgen_int.h new file mode 100644 index 00000000000..01fb727691b --- /dev/null +++ b/bench/workgen/workgen_int.h @@ -0,0 +1,205 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#ifndef SWIG +extern "C" { +#include "workgen_func.h" +} +#endif + +namespace workgen { + +// A 'tint' or ('table integer') is a unique small value integer +// assigned to each table URI in use. Currently, we assign it once, +// and its value persists through the lifetime of the Context. +typedef uint32_t tint_t; + +struct ThreadRunner; +struct WorkloadRunner; + +// A exception generated by the workgen classes. Methods generally return an +// int errno, so this is useful primarily for notifying the caller about +// failures in constructors. +struct WorkgenException { + std::string _str; + WorkgenException() : _str() {} + WorkgenException(int err, const char *msg = NULL) : _str() { + if (err != 0) + _str += wiredtiger_strerror(err); + if (msg != NULL) { + if (!_str.empty()) + _str += ": "; + _str += msg; + } + } + WorkgenException(const WorkgenException &other) : _str(other._str) {} + ~WorkgenException() {} +}; + +struct Throttle { + ThreadRunner &_runner; + double _throttle; + double _burst; + timespec _next_div; + int64_t _ops_delta; + uint64_t _ops_prev; // previously returned value + uint64_t _ops_per_div; // statically calculated. + uint64_t _ms_per_div; // statically calculated. + bool _started; + + Throttle(ThreadRunner &runner, double throttle, double burst); + ~Throttle(); + + // Called with the number of operations since the last throttle. + // Sleeps for any needed amount and returns the number operations the + // caller should perform before the next call to throttle. + int throttle(uint64_t op_count, uint64_t *op_limit); +}; + +// There is one of these per Thread object. It exists for the duration of a +// call to Workload::run() method. +struct ThreadRunner { + int _errno; + WorkgenException _exception; + Thread *_thread; + Context *_context; + ContextInternal *_icontext; + Workload *_workload; + WorkloadRunner *_wrunner; + workgen_random_state *_rand_state; + Throttle *_throttle; + uint64_t _throttle_ops; + uint64_t _throttle_limit; + bool _in_transaction; + uint32_t _number; + Stats _stats; + + typedef enum { + USAGE_READ = 0x1, USAGE_WRITE = 0x2, USAGE_MIXED = 0x4 } Usage; + std::map _table_usage; // value is Usage + WT_CURSOR **_cursors; // indexed by tint_t + volatile bool _stop; + WT_SESSION *_session; + char *_keybuf; + char *_valuebuf; + bool _repeat; + + ThreadRunner(); + ~ThreadRunner(); + + void free_all(); + static int cross_check(std::vector &runners); + + int close_all(); + int create_all(WT_CONNECTION *conn); + void get_static_counts(Stats &); + int open_all(); + int run(); + + void op_create_all(Operation *, size_t &keysize, size_t &valuesize); + uint64_t op_get_key_recno(Operation *, tint_t tint); + void op_get_static_counts(Operation *, Stats &, int); + int op_run(Operation *); + +#ifdef _DEBUG + std::stringstream _debug_messages; + std::string get_debug(); +#define DEBUG_CAPTURE(runner, expr) runner._debug_messages << expr +#else +#define DEBUG_CAPTURE(runner, expr) +#endif +}; + +struct Monitor { + int _errno; + WorkgenException _exception; + WorkloadRunner &_wrunner; + volatile bool _stop; + pthread_t _handle; + std::ostream *_out; + + Monitor(WorkloadRunner &wrunner); + ~Monitor(); + int run(); +}; + +struct ContextInternal { + std::map _tint; // maps uri -> tint_t + std::map _table_names; // reverse mapping + uint64_t *_recno; // # entries per tint_t + uint32_t _recno_alloced; // length of allocated _recno + tint_t _tint_last; // last tint allocated + // unique id per context, to work with multiple contexts, starts at 1. + uint32_t _context_count; + + ContextInternal(); + ~ContextInternal(); + int create_all(); +}; + +struct TableInternal { + tint_t _tint; + uint32_t _context_count; + + TableInternal(); + TableInternal(const TableInternal &other); + ~TableInternal(); +}; + +// An instance of this class only exists for the duration of one call to a +// Workload::run() method. +struct WorkloadRunner { + Workload *_workload; + std::vector _trunners; + std::ostream *_report_out; + std::string _wt_home; + timespec _start; + + WorkloadRunner(Workload *); + ~WorkloadRunner(); + int run(WT_CONNECTION *conn); + +private: + int close_all(); + int create_all(WT_CONNECTION *conn, Context *context); + void final_report(timespec &); + void get_stats(Stats *stats); + int open_all(); + void open_report_file(std::ofstream &, const char *, const char *); + void report(time_t, time_t, Stats *stats); + int run_all(); + + WorkloadRunner(const WorkloadRunner &); // disallowed + WorkloadRunner& operator=(const WorkloadRunner &other); // disallowed +}; + +}; diff --git a/bench/workgen/workgen_time.h b/bench/workgen/workgen_time.h new file mode 100644 index 00000000000..f33eb64d9c9 --- /dev/null +++ b/bench/workgen/workgen_time.h @@ -0,0 +1,201 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#define THOUSAND (1000ULL) +#define MILLION (1000000ULL) +#define BILLION (1000000000ULL) + +#define NSEC_PER_SEC BILLION +#define USEC_PER_SEC MILLION +#define MSEC_PER_SEC THOUSAND + +#define ns_to_ms(v) ((v) / MILLION) +#define ns_to_sec(v) ((v) / BILLION) +#define ns_to_us(v) ((v) / THOUSAND) + +#define us_to_ms(v) ((v) / THOUSAND) +#define us_to_ns(v) ((v) * THOUSAND) +#define us_to_sec(v) ((v) / MILLION) + +#define ms_to_ns(v) ((v) * MILLION) +#define ms_to_us(v) ((v) * THOUSAND) +#define ms_to_sec(v) ((v) / THOUSAND) + +#define sec_to_ns(v) ((v) * BILLION) +#define sec_to_us(v) ((v) * MILLION) +#define sec_to_ms(v) ((v) * THOUSAND) + +inline std::ostream& +operator<<(std::ostream &os, const timespec &ts) +{ + char oldfill; + std::streamsize oldwidth; + + os << ts.tv_sec << "."; + oldfill = os.fill('0'); + oldwidth = os.width(3); + os << (int)ns_to_ms(ts.tv_nsec); + os.fill(oldfill); + os.width(oldwidth); + return (os); +} + +inline timespec +operator-(const timespec &lhs, const timespec &rhs) +{ + timespec ts; + + if (lhs.tv_nsec < rhs.tv_nsec) { + ts.tv_sec = lhs.tv_sec - rhs.tv_sec - 1; + ts.tv_nsec = lhs.tv_nsec - rhs.tv_nsec + NSEC_PER_SEC; + } else { + ts.tv_sec = lhs.tv_sec - rhs.tv_sec; + ts.tv_nsec = lhs.tv_nsec - rhs.tv_nsec; + } + return (ts); +} + +inline timespec +operator+(const timespec &lhs, const int n) +{ + timespec ts = lhs; + ts.tv_sec += n; + return (ts); +} + +inline bool +operator<(const timespec &lhs, const timespec &rhs) +{ + if (lhs.tv_sec == rhs.tv_sec) + return (lhs.tv_nsec < rhs.tv_nsec); + else + return (lhs.tv_sec < rhs.tv_sec); +} + +inline bool +operator>(const timespec &lhs, const timespec &rhs) +{ + if (lhs.tv_sec == rhs.tv_sec) + return (lhs.tv_nsec > rhs.tv_nsec); + else + return (lhs.tv_sec > rhs.tv_sec); +} + +inline bool +operator>=(const timespec &lhs, const timespec &rhs) +{ + return (!(lhs < rhs)); +} + +inline bool +operator<=(const timespec &lhs, const timespec &rhs) +{ + return (!(lhs > rhs)); +} + +inline bool +operator==(const timespec &lhs, int n) +{ + return (lhs.tv_sec == n && lhs.tv_nsec == 0); +} + +inline bool +operator!=(const timespec &lhs, int n) +{ + return (lhs.tv_sec != n || lhs.tv_nsec != 0); +} + +inline timespec & +operator+=(timespec &lhs, const int n) +{ + lhs.tv_sec += n; + return (lhs); +} + +inline bool +operator==(const timespec &lhs, const timespec &rhs) +{ + return (lhs.tv_sec == rhs.tv_sec && lhs.tv_nsec == rhs.tv_nsec); +} + +inline timespec & +operator-=(timespec &lhs, const timespec &rhs) +{ + lhs.tv_sec -= rhs.tv_sec; + lhs.tv_nsec -= rhs.tv_nsec; + if (lhs.tv_nsec < 0) { + lhs.tv_nsec += NSEC_PER_SEC; + lhs.tv_sec -= 1; + } + return (lhs); +} + +inline timespec +ts_add_ms(const timespec &lhs, const uint64_t n) +{ + timespec ts; + + ts.tv_sec = lhs.tv_sec + ms_to_sec(n); + ts.tv_nsec = lhs.tv_nsec + ms_to_ns(n % THOUSAND); + while ((unsigned long)ts.tv_nsec > NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return (ts); +} + +inline void +ts_assign(timespec &lhs, const timespec &rhs) +{ + lhs.tv_sec = rhs.tv_sec; + lhs.tv_nsec = rhs.tv_nsec; +} + +inline void +ts_clear(timespec &ts) +{ + ts.tv_sec = 0; + ts.tv_nsec = 0; +} + +inline uint64_t +ts_sec(const timespec &ts) +{ + return (ns_to_sec(ts.tv_nsec) + ts.tv_sec); +} + +inline uint64_t +ts_ms(const timespec &ts) +{ + return (ns_to_ms(ts.tv_nsec) + sec_to_ms(ts.tv_sec)); +} + +inline uint64_t +ts_us(const timespec &ts) +{ + return (ns_to_us(ts.tv_nsec) + sec_to_us(ts.tv_sec)); +} diff --git a/build_posix/Make.subdirs b/build_posix/Make.subdirs index 4ecec37ca6c..ec928a9ead2 100644 --- a/build_posix/Make.subdirs +++ b/build_posix/Make.subdirs @@ -1,10 +1,11 @@ # List of sub-directories, used by makemake to create Makefile.am # # The format is: -# [] +# [ ...] # # If the directory exists, it is added to AUTO_SUBDIRS. -# If a condition is included, the subdir is made conditional via AM_CONDITIONAL +# If condition(s) are included, the subdir is made conditional via +# AM_CONDITIONAL. All conditions must be true to include the directory. ext/collators/reverse ext/collators/revint ext/compressors/lz4 LZ4 @@ -45,4 +46,5 @@ test/syscall test/thread # Benchmark programs. +bench/workgen PYTHON HAVE_CXX bench/wtperf diff --git a/build_posix/configure.ac.in b/build_posix/configure.ac.in index 0fef587b4b8..68b4d84ba59 100644 --- a/build_posix/configure.ac.in +++ b/build_posix/configure.ac.in @@ -24,6 +24,7 @@ AM_PROG_AS(as gas) define([AC_LIBTOOL_LANG_CXX_CONFIG], [:])dnl define([AC_LIBTOOL_LANG_F77_CONFIG], [:])dnl +AM_CONDITIONAL([HAVE_CXX], [test "$CXX" != ""]) LT_PREREQ(2.2.6) LT_INIT([pic-only]) diff --git a/build_posix/makemake b/build_posix/makemake index 506420b4aaf..73d6b6bcfb1 100755 --- a/build_posix/makemake +++ b/build_posix/makemake @@ -7,14 +7,24 @@ (sed -n '1,/BEGIN SUBDIRS/p' Make.base echo "SUBDIRS =" -sed -e 's/#.*$//' -e '/^$/d' Make.subdirs | while read dir cond ; do +sed -e 's/#.*$//' -e '/^$/d' Make.subdirs | while read dir conds ; do test -d ../$dir || continue - if test -n "$cond" ; then - cat < $t -- cgit v1.2.1 From 35d41e49f6a95804866ac8a791b267d005a3027d Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 16 May 2017 00:26:57 -0400 Subject: WT-3142 Add a workload generator application (#3420) Fix up some copyright notices. --- bench/workgen/runner/example_simple.py | 30 +++++++++++++++++++++++- bench/workgen/runner/example_txn.py | 30 +++++++++++++++++++++++- bench/workgen/runner/insert_test.py | 30 +++++++++++++++++++++++- bench/workgen/runner/multi_btree_heavy_stress.py | 30 +++++++++++++++++++++++- bench/workgen/runner/runner/latency.py | 2 +- bench/workgen/runner/small_btree.py | 30 +++++++++++++++++++++++- bench/workgen/workgen/__init__.py | 2 +- dist/s_copyright.list | 4 ++++ 8 files changed, 151 insertions(+), 7 deletions(-) diff --git a/bench/workgen/runner/example_simple.py b/bench/workgen/runner/example_simple.py index de944cbe29e..626f7ca64a5 100755 --- a/bench/workgen/runner/example_simple.py +++ b/bench/workgen/runner/example_simple.py @@ -1,4 +1,32 @@ -#!/usr/bin/python +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + from runner import * from wiredtiger import * from workgen import * diff --git a/bench/workgen/runner/example_txn.py b/bench/workgen/runner/example_txn.py index ef1d7a93941..1b22dc10aba 100644 --- a/bench/workgen/runner/example_txn.py +++ b/bench/workgen/runner/example_txn.py @@ -1,4 +1,32 @@ -#!/usr/bin/python +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + from runner import * from wiredtiger import * from workgen import * diff --git a/bench/workgen/runner/insert_test.py b/bench/workgen/runner/insert_test.py index 30f2818e91e..8380c6ba3eb 100644 --- a/bench/workgen/runner/insert_test.py +++ b/bench/workgen/runner/insert_test.py @@ -1,4 +1,32 @@ -#!/usr/bin/python +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + from runner import * from wiredtiger import * from workgen import * diff --git a/bench/workgen/runner/multi_btree_heavy_stress.py b/bench/workgen/runner/multi_btree_heavy_stress.py index 0993f60248d..94dacfc4311 100644 --- a/bench/workgen/runner/multi_btree_heavy_stress.py +++ b/bench/workgen/runner/multi_btree_heavy_stress.py @@ -1,4 +1,32 @@ -#!/usr/bin/python +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + # Drive a constant high workload through, even if WiredTiger isn't keeping # up by dividing the workload across a lot of threads. This needs to be # tuned to the particular machine so the workload is close to capacity in the diff --git a/bench/workgen/runner/runner/latency.py b/bench/workgen/runner/runner/latency.py index 46d9be9bad8..8eaa10693a9 100644 --- a/bench/workgen/runner/runner/latency.py +++ b/bench/workgen/runner/runner/latency.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/bench/workgen/runner/small_btree.py b/bench/workgen/runner/small_btree.py index d70f0d9e693..2bcc0188f30 100644 --- a/bench/workgen/runner/small_btree.py +++ b/bench/workgen/runner/small_btree.py @@ -1,4 +1,32 @@ -#!/usr/bin/python +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + from runner import * from wiredtiger import * from workgen import * diff --git a/bench/workgen/workgen/__init__.py b/bench/workgen/workgen/__init__.py index 00e8f257546..ff665bf9398 100644 --- a/bench/workgen/workgen/__init__.py +++ b/bench/workgen/workgen/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Public Domain 2014-2016 MongoDB, Inc. +# Public Domain 2014-2017 MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. diff --git a/dist/s_copyright.list b/dist/s_copyright.list index 2db76d9f3c5..ba5e7c6ff3e 100644 --- a/dist/s_copyright.list +++ b/dist/s_copyright.list @@ -1,4 +1,8 @@ +skip api/leveldb/leveldb_wt_config.h skip api/leveldb/leveldb_wt_config.in +skip bench/workgen/workgen.py +skip bench/workgen/workgen/workgen.py +skip bench/workgen/workgen_wrap.cxx skip build_win/wiredtiger_config.h skip dist/api_config.py skip dist/api_data.py -- cgit v1.2.1 From 34b599ea3c994498b53ec1c9deb62914b9798b0b Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Tue, 16 May 2017 23:31:23 +1000 Subject: WT-3142 Check if C++ works in autoconf, not just if a variable is set. (#3421) --- build_posix/configure.ac.in | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/build_posix/configure.ac.in b/build_posix/configure.ac.in index 68b4d84ba59..4de12d5161e 100644 --- a/build_posix/configure.ac.in +++ b/build_posix/configure.ac.in @@ -24,7 +24,16 @@ AM_PROG_AS(as gas) define([AC_LIBTOOL_LANG_CXX_CONFIG], [:])dnl define([AC_LIBTOOL_LANG_F77_CONFIG], [:])dnl -AM_CONDITIONAL([HAVE_CXX], [test "$CXX" != ""]) + +# Check whether the C++ compiler works by linking a trivial program. +AC_CACHE_CHECK([whether the C++ compiler works], + [wt_cv_prog_cxx_works], + [AC_LANG_PUSH([C++]) + AC_LINK_IFELSE([AC_LANG_PROGRAM([], [])], + [wt_cv_prog_cxx_works=yes], + [wt_cv_prog_cxx_works=no]) + AC_LANG_POP([C++])]) +AM_CONDITIONAL([HAVE_CXX], [test "$wt_cv_prog_cxx_works" = "yes"]) LT_PREREQ(2.2.6) LT_INIT([pic-only]) -- cgit v1.2.1 From e3c1f0012e3eac806a7b8e64cb4dee90aeab8eda Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 16 May 2017 12:02:51 -0400 Subject: WT-3314 clarify WT_PANIC error handling (#3418) A correctly-written WiredTiger application will have an error handler for WT_PANIC, and that isn't explicit in the documentation. --- dist/api_err.py | 8 ++++---- examples/c/ex_event_handler.c | 4 ++++ src/docs/error-handling.dox | 26 +++++++++++++++++++++----- src/include/wiredtiger.in | 7 +++---- 4 files changed, 32 insertions(+), 13 deletions(-) diff --git a/dist/api_err.py b/dist/api_err.py index 06887476dbc..bfa4459d438 100644 --- a/dist/api_err.py +++ b/dist/api_err.py @@ -41,10 +41,10 @@ errors = [ WT_CURSOR::update or WT_CURSOR::remove.'''), Error('WT_PANIC', -31804, 'WiredTiger library panic', ''' - This error indicates an underlying problem that requires the - application exit and restart. The application can exit - immediately when \c WT_PANIC is returned from a WiredTiger - interface, no further WiredTiger calls are required.'''), + This error indicates an underlying problem that requires a database + restart. The application may exit immediately, no further WiredTiger + calls are required (and further calls will themselves immediately + fail).'''), Error('WT_RESTART', -31805, 'restart the operation (internal)', undoc=True), Error('WT_RUN_RECOVERY', -31806, diff --git a/examples/c/ex_event_handler.c b/examples/c/ex_event_handler.c index 153b44c35bf..acd9d9beecc 100644 --- a/examples/c/ex_event_handler.c +++ b/examples/c/ex_event_handler.c @@ -70,6 +70,10 @@ handle_wiredtiger_error(WT_EVENT_HANDLER *handler, "app_id %s, thread context %p, error %d, message %s\n", custom_handler->app_id, (void *)session, error, message); + /* Exit if the database has a fatal error. */ + if (error == WT_PANIC) + exit (1); + return (0); } diff --git a/src/docs/error-handling.dox b/src/docs/error-handling.dox index 62be498fc15..eb9ca6bb82a 100644 --- a/src/docs/error-handling.dox +++ b/src/docs/error-handling.dox @@ -17,13 +17,18 @@ are thrown as \c WiredTigerException, which may be caught by the application. The \c WiredTigerRollbackException is a specific type of \c WiredTigerException, -it is thrown when there is a conflict between concurrent operations. +thrown when there is a conflict between concurrent operations. An application that catches this exception should call rollback() on the relevant transaction, and retry as necessary. The \c WiredTigerPanicException is a specific type of \c WiredTigerException, -it is thrown when there is an underlying problem that requires the -application to exit and restart. +thrown when there is a fatal error requiring database restart. Applications +will normally handle \c WiredTigerPanicException as a special case. A +correctly-written WiredTiger application will likely catch +\c WiredTigerPanicException and immediately exit or otherwise handle fatal +errors. Note that no further WiredTiger calls are required after +\c WiredTigerPanicException is caught (and further calls will themselves +immediately fail). The following is a complete list of possible WiredTiger-specific return values, all constants defined in the com.wiredtiger.db.wiredtiger class: @@ -47,7 +52,7 @@ This error is returned when an error is not covered by a specific error return. This error indicates an operation did not find a value to return. This includes cursor search and other operations where no record matched the cursor's search key such as WT_CURSOR::update or WT_CURSOR::remove. @par WT_PANIC -This error indicates an underlying problem that requires the application exit and restart. The application can exit immediately when \c WT_PANIC is returned from a WiredTiger interface, no further WiredTiger calls are required. +This error indicates an underlying problem that requires a database restart. The application may exit immediately, no further WiredTiger calls are required (and further calls will themselves immediately fail). @par WT_RUN_RECOVERY This error is generated when wiredtiger_open is configured to return an error if recovery is required to use the database. @@ -73,7 +78,7 @@ Note that ::wiredtiger_strerror is not thread-safe. @m_if{c} @section error_handling_event Error handling using the WT_EVENT_HANDLER -More complex error handling can be configured by passing an implementation +Specific error handling can be configured by passing an implementation of WT_EVENT_HANDLER to ::wiredtiger_open or WT_CONNECTION::open_session. For example, both informational and error messages might be passed to an @@ -81,6 +86,17 @@ application-specific logging function that added a timestamp and logged the message to a file, and error messages might additionally be output to the \c stderr file stream. +Additionally, applications will normally handle \c WT_PANIC as a special +case. WiredTiger will always call the error handler callback with +\c WT_PANIC in the case of a fatal error requiring database restart, +however, WiredTiger cannot guarantee applications will see an application +thread return \c WT_PANIC from a WiredTiger API call. For this reason, a +correctly-written WiredTiger application will likely specify at least an +error handler which will immediately exit or otherwise handle fatal errors. +Note that no further WiredTiger calls are required after an error handler +is called with \c WT_PANIC (and further calls will themselves immediately +fail). + @snippet ex_event_handler.c Function event_handler @snippet ex_event_handler.c Configure event_handler diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index e38c41baccd..848448a453d 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -3143,10 +3143,9 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp) #define WT_NOTFOUND (-31803) /*! * WiredTiger library panic. - * This error indicates an underlying problem that requires the application exit - * and restart. The application can exit immediately when \c WT_PANIC is - * returned from a WiredTiger interface, no further WiredTiger calls are - * required. + * This error indicates an underlying problem that requires a database restart. + * The application may exit immediately, no further WiredTiger calls are + * required (and further calls will themselves immediately fail). */ #define WT_PANIC (-31804) /*! @cond internal */ -- cgit v1.2.1 From 216e12b6da26182ffc666d2958eba2ee7d5a715c Mon Sep 17 00:00:00 2001 From: David Hows Date: Wed, 17 May 2017 13:55:05 +1000 Subject: WT-3313 Replace calls to the deprecated LZ4_compress function (#3423) --- ext/compressors/lz4/lz4_compress.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ext/compressors/lz4/lz4_compress.c b/ext/compressors/lz4/lz4_compress.c index e99d125b69f..279f0be6c36 100644 --- a/ext/compressors/lz4/lz4_compress.c +++ b/ext/compressors/lz4/lz4_compress.c @@ -54,8 +54,8 @@ typedef struct { /* * LZ4 decompression requires the exact compressed byte count returned by the - * LZ4_compress and LZ4_compress_destSize functions. WiredTiger doesn't track - * that value, store it in the destination buffer. + * LZ4_compress_default and LZ4_compress_destSize functions. WiredTiger doesn't + * track that value, store it in the destination buffer. * * Additionally, LZ4_compress_destSize may compress into the middle of a record, * and after decompression we return the length to the last record successfully @@ -137,11 +137,10 @@ lz4_compress(WT_COMPRESSOR *compressor, WT_SESSION *session, (void)compressor; /* Unused parameters */ (void)session; - (void)dst_len; /* Compress, starting after the prefix bytes. */ - lz4_len = LZ4_compress( - (const char *)src, (char *)dst + sizeof(LZ4_PREFIX), (int)src_len); + lz4_len = LZ4_compress_default((const char *)src, + (char *)dst + sizeof(LZ4_PREFIX), (int)src_len, (int)dst_len); /* * If compression succeeded and the compressed length is smaller than -- cgit v1.2.1 From 5c4e0e9ee90b343e9878dd8f6cab1365fc0dce97 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 19 May 2017 01:07:03 -0400 Subject: WT-3303 Deadlock during first access to lookaside table (#3426) Don't acquire the eviction walk-lock when releasing exclusive eviction access to a file, it can deadlock. Instead, atomically decrement the counter. --- dist/s_string.ok | 1 + src/evict/evict_lru.c | 28 ++++++++++++++++++++-------- src/include/btree.h | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/dist/s_string.ok b/dist/s_string.ok index 7c409e0e46d..ac21c61a8ef 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -811,6 +811,7 @@ intl intnum intpack intptr +intr intrin inuse io diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 48d1ae95547..97b96788831 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -906,10 +906,8 @@ void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) { WT_BTREE *btree; - WT_CACHE *cache; btree = S2BT(session); - cache = S2C(session)->cache; /* * We have seen subtle bugs with multiple threads racing to turn @@ -917,12 +915,26 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) */ WT_DIAGNOSTIC_YIELD; - /* Hold the walk lock to turn on eviction. */ - __wt_spin_lock(session, &cache->evict_walk_lock); - WT_ASSERT(session, - btree->evict_ref == NULL && btree->evict_disabled > 0); - --btree->evict_disabled; - __wt_spin_unlock(session, &cache->evict_walk_lock); + /* + * Atomically decrement the evict-disabled count, without acquiring the + * eviction walk-lock. We can't acquire that lock here because there's + * a potential deadlock. When acquiring exclusive eviction access, we + * acquire the eviction walk-lock and then the cache's pass-intr lock. + * The current eviction implementation can hold the pass-intr lock and + * call into this function (see WT-3303 for the details), which might + * deadlock with another thread trying to get exclusive eviction access. + */ +#if defined(HAVE_DIAGNOSTIC) + { + int32_t v; + + WT_ASSERT(session, btree->evict_ref == NULL); + v = __wt_atomic_subi32(&btree->evict_disabled, 1); + WT_ASSERT(session, v >= 0); + } +#else + (void)__wt_atomic_subi32(&btree->evict_disabled, 1); +#endif } #define EVICT_TUNE_BATCH 1 /* Max workers to add each period */ diff --git a/src/include/btree.h b/src/include/btree.h index 74c7871034e..95af9e154f8 100644 --- a/src/include/btree.h +++ b/src/include/btree.h @@ -150,7 +150,7 @@ struct __wt_btree { u_int evict_walk_period; /* Skip this many LRU walks */ u_int evict_walk_saved; /* Saved walk skips for checkpoints */ u_int evict_walk_skips; /* Number of walks skipped */ - int evict_disabled; /* Eviction disabled count */ + int32_t evict_disabled; /* Eviction disabled count */ volatile uint32_t evict_busy; /* Count of threads in eviction */ int evict_start_type; /* Start position for eviction walk (see WT_EVICT_WALK_START). */ -- cgit v1.2.1 From d61a94a99c87a9e94e4bd959e2269b32d1a7dd22 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Fri, 19 May 2017 01:47:34 -0400 Subject: WT-3327 Check for system clock ticking backwards (#3427) --- dist/stat_data.py | 1 + src/include/misc.i | 25 ++++ src/include/session.h | 1 + src/include/stat.h | 1 + src/include/wiredtiger.in | 294 +++++++++++++++++++++++----------------------- src/os_posix/os_time.c | 5 +- src/os_win/os_time.c | 3 +- src/support/stat.c | 3 + 8 files changed, 184 insertions(+), 149 deletions(-) diff --git a/dist/stat_data.py b/dist/stat_data.py index 9db0b3b0e80..4a147ca44eb 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -150,6 +150,7 @@ connection_stats = [ ConnStat('read_io', 'total read I/Os'), ConnStat('rwlock_read', 'pthread mutex shared lock read-lock calls'), ConnStat('rwlock_write', 'pthread mutex shared lock write-lock calls'), + ConnStat('time_travel', 'detected system time went backwards'), ConnStat('write_io', 'total write I/Os'), ########################################## diff --git a/src/include/misc.i b/src/include/misc.i index 78c6dc8a7dd..36a1e1f18eb 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -54,6 +54,31 @@ __wt_seconds(WT_SESSION_IMPL *session, time_t *timep) *timep = t.tv_sec; } +/* + * __wt_time_check_monotonic -- + * Check and prevent time running backward. If we detect that it has, we + * set the time structure to the previous values, making time stand still + * until we see a time in the future of the highest value seen so far. + */ +static inline void +__wt_time_check_monotonic(WT_SESSION_IMPL *session, struct timespec *tsp) +{ + /* + * Detect time going backward. If so, use the last + * saved timestamp. + */ + if (session == NULL) + return; + + if (tsp->tv_sec < session->last_epoch.tv_sec || + (tsp->tv_sec == session->last_epoch.tv_sec && + tsp->tv_nsec < session->last_epoch.tv_nsec)) { + WT_STAT_CONN_INCR(session, time_travel); + *tsp = session->last_epoch; + } else + session->last_epoch = *tsp; +} + /* * __wt_verbose -- * Verbose message. diff --git a/src/include/session.h b/src/include/session.h index de2c1463684..543063f5a90 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -66,6 +66,7 @@ struct __wt_session_impl { /* Session handle reference list */ TAILQ_HEAD(__dhandles, __wt_data_handle_cache) dhandles; time_t last_sweep; /* Last sweep for dead handles */ + struct timespec last_epoch; /* Last epoch time returned */ /* Cursors closed with the session */ TAILQ_HEAD(__cursors, __wt_cursor) cursors; diff --git a/src/include/stat.h b/src/include/stat.h index a537940d075..beb589dc0ef 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -361,6 +361,7 @@ struct __wt_connection_stats { int64_t cache_eviction_clean; int64_t cond_auto_wait_reset; int64_t cond_auto_wait; + int64_t time_travel; int64_t file_open; int64_t memory_allocation; int64_t memory_free; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 848448a453d..b93fbebef25 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -4588,316 +4588,318 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1102 /*! connection: auto adjusting condition wait calls */ #define WT_STAT_CONN_COND_AUTO_WAIT 1103 +/*! connection: detected system time went backwards */ +#define WT_STAT_CONN_TIME_TRAVEL 1104 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1104 +#define WT_STAT_CONN_FILE_OPEN 1105 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1105 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1106 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1106 +#define WT_STAT_CONN_MEMORY_FREE 1107 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1107 +#define WT_STAT_CONN_MEMORY_GROW 1108 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1108 +#define WT_STAT_CONN_COND_WAIT 1109 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1109 +#define WT_STAT_CONN_RWLOCK_READ 1110 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1110 +#define WT_STAT_CONN_RWLOCK_WRITE 1111 /*! connection: total fsync I/Os */ -#define WT_STAT_CONN_FSYNC_IO 1111 +#define WT_STAT_CONN_FSYNC_IO 1112 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1112 +#define WT_STAT_CONN_READ_IO 1113 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1113 +#define WT_STAT_CONN_WRITE_IO 1114 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1114 +#define WT_STAT_CONN_CURSOR_CREATE 1115 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1115 +#define WT_STAT_CONN_CURSOR_INSERT 1116 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1116 +#define WT_STAT_CONN_CURSOR_NEXT 1117 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1117 +#define WT_STAT_CONN_CURSOR_PREV 1118 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1118 +#define WT_STAT_CONN_CURSOR_REMOVE 1119 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1119 +#define WT_STAT_CONN_CURSOR_RESET 1120 /*! cursor: cursor restarted searches */ -#define WT_STAT_CONN_CURSOR_RESTART 1120 +#define WT_STAT_CONN_CURSOR_RESTART 1121 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1121 +#define WT_STAT_CONN_CURSOR_SEARCH 1122 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1122 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1123 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1123 +#define WT_STAT_CONN_CURSOR_UPDATE 1124 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1124 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1125 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1125 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1126 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1126 +#define WT_STAT_CONN_DH_SWEEP_REF 1127 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1127 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1128 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1128 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1129 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1129 +#define WT_STAT_CONN_DH_SWEEP_TOD 1130 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1130 +#define WT_STAT_CONN_DH_SWEEPS 1131 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1131 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1132 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1132 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1133 /*! lock: checkpoint lock acquisitions */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1133 +#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1134 /*! lock: checkpoint lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1134 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1135 /*! lock: checkpoint lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1135 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1136 /*! lock: handle-list lock eviction thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1136 +#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1137 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1137 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1138 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1138 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1139 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1139 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1140 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1140 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1141 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1141 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1142 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1142 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1143 /*! lock: table lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_COUNT 1143 +#define WT_STAT_CONN_LOCK_TABLE_COUNT 1144 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1144 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1145 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1145 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1146 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1146 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1147 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1147 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1148 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1148 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1149 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1149 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1150 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1150 +#define WT_STAT_CONN_LOG_FLUSH 1151 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1151 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1152 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1152 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1153 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1153 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1154 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1154 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1155 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1155 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1156 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1156 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1157 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1157 +#define WT_STAT_CONN_LOG_SCANS 1158 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1158 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1159 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1159 +#define WT_STAT_CONN_LOG_WRITE_LSN 1160 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1160 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1161 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1161 +#define WT_STAT_CONN_LOG_SYNC 1162 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1162 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1163 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1163 +#define WT_STAT_CONN_LOG_SYNC_DIR 1164 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1164 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1165 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1165 +#define WT_STAT_CONN_LOG_WRITES 1166 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1166 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1167 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1167 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1168 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1168 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1169 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1169 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1170 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1170 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1171 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1171 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1172 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1172 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1173 /*! log: slot close lost race */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1173 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1174 /*! log: slot close unbuffered waits */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1174 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1175 /*! log: slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1175 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1176 /*! log: slot join atomic update races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1176 +#define WT_STAT_CONN_LOG_SLOT_RACES 1177 /*! log: slot join calls atomic updates raced */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1177 +#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1178 /*! log: slot join calls did not yield */ -#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1178 +#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1179 /*! log: slot join calls found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1179 +#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1180 /*! log: slot join calls slept */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1180 +#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1181 /*! log: slot join calls yielded */ -#define WT_STAT_CONN_LOG_SLOT_YIELD 1181 +#define WT_STAT_CONN_LOG_SLOT_YIELD 1182 /*! log: slot join found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1182 +#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1183 /*! log: slot joins yield time (usecs) */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1183 +#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1184 /*! log: slot transitions unable to find free slot */ -#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1184 +#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1185 /*! log: slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1185 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1186 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1186 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1187 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1187 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1188 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1188 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1189 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1189 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1190 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1190 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1191 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1191 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1192 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1192 +#define WT_STAT_CONN_REC_PAGES 1193 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1193 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1194 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1194 +#define WT_STAT_CONN_REC_PAGE_DELETE 1195 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1195 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1196 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1196 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1197 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1197 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1198 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1198 +#define WT_STAT_CONN_SESSION_OPEN 1199 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1199 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1200 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1200 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1201 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1201 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1202 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1202 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1203 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1203 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1204 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1204 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1205 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1205 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1206 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1206 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1207 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1207 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1208 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1208 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1209 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1209 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1210 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1210 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1211 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1211 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1212 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1212 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1213 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1213 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1214 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1214 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1215 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1215 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1216 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1216 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1217 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1217 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1218 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1218 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1219 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1219 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1220 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1220 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1221 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1221 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1222 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1222 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1223 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1223 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1224 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1224 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1225 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1225 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1226 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1226 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1227 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1227 +#define WT_STAT_CONN_PAGE_SLEEP 1228 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1228 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1229 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1229 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1230 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1230 +#define WT_STAT_CONN_TXN_BEGIN 1231 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1231 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1232 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1232 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1233 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1233 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1234 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1234 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1235 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1235 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1236 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1236 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1237 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1237 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1238 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1238 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1239 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1239 +#define WT_STAT_CONN_TXN_CHECKPOINT 1240 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1240 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1241 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1241 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1242 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1242 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1243 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1243 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1244 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1244 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1245 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1245 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1246 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1246 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1247 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1247 +#define WT_STAT_CONN_TXN_SYNC 1248 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1248 +#define WT_STAT_CONN_TXN_COMMIT 1249 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1249 +#define WT_STAT_CONN_TXN_ROLLBACK 1250 /*! * @} diff --git a/src/os_posix/os_time.c b/src/os_posix/os_time.c index 7a1d25427d0..ceb2c613551 100644 --- a/src/os_posix/os_time.c +++ b/src/os_posix/os_time.c @@ -29,8 +29,10 @@ __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) #if defined(HAVE_CLOCK_GETTIME) WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, tsp), ret); - if (ret == 0) + if (ret == 0) { + __wt_time_check_monotonic(session, tsp); return; + } WT_PANIC_MSG(session, ret, "clock_gettime"); #elif defined(HAVE_GETTIMEOFDAY) struct timeval v; @@ -39,6 +41,7 @@ __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) if (ret == 0) { tsp->tv_sec = v.tv_sec; tsp->tv_nsec = v.tv_usec * WT_THOUSAND; + __wt_time_check_monotonic(session, tsp); return; } WT_PANIC_MSG(session, ret, "gettimeofday"); diff --git a/src/os_win/os_time.c b/src/os_win/os_time.c index b06ab69c8e4..0818b7afcf0 100644 --- a/src/os_win/os_time.c +++ b/src/os_win/os_time.c @@ -18,14 +18,13 @@ __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) FILETIME time; uint64_t ns100; - WT_UNUSED(session); - GetSystemTimeAsFileTime(&time); ns100 = (((int64_t)time.dwHighDateTime << 32) + time.dwLowDateTime) - 116444736000000000LL; tsp->tv_sec = ns100 / 10000000; tsp->tv_nsec = (long)((ns100 % 10000000) * 100); + __wt_time_check_monotonic(session, tsp); } /* diff --git a/src/support/stat.c b/src/support/stat.c index 2b55ae37541..8711e6b9bc1 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -728,6 +728,7 @@ static const char * const __stats_connection_desc[] = { "cache: unmodified pages evicted", "connection: auto adjusting condition resets", "connection: auto adjusting condition wait calls", + "connection: detected system time went backwards", "connection: files currently open", "connection: memory allocations", "connection: memory frees", @@ -1020,6 +1021,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_clean = 0; stats->cond_auto_wait_reset = 0; stats->cond_auto_wait = 0; + stats->time_travel = 0; /* not clearing file_open */ stats->memory_allocation = 0; stats->memory_free = 0; @@ -1332,6 +1334,7 @@ __wt_stat_connection_aggregate( to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean); to->cond_auto_wait_reset += WT_STAT_READ(from, cond_auto_wait_reset); to->cond_auto_wait += WT_STAT_READ(from, cond_auto_wait); + to->time_travel += WT_STAT_READ(from, time_travel); to->file_open += WT_STAT_READ(from, file_open); to->memory_allocation += WT_STAT_READ(from, memory_allocation); to->memory_free += WT_STAT_READ(from, memory_free); -- cgit v1.2.1 From 9946ed24f2187bfc724edd8eb46069fcb94e090f Mon Sep 17 00:00:00 2001 From: sueloverso Date: Mon, 22 May 2017 12:47:10 -0400 Subject: WT-3264 Fix log file cleanup when toggling logging. (#3419) * WT-3264 Fix log file cleanup when toggling logging. * whitespace * Fix test loop bug * Remove debugging. Test fixes. Reset slot. * Update comments. Remove unused var. * Rename goto label --- src/conn/conn_log.c | 2 +- src/include/extern.h | 3 +- src/log/log.c | 55 +++++++++++++++++++ src/log/log_slot.c | 20 ++++--- src/txn/txn_recover.c | 25 ++++++++- test/suite/test_txn09.py | 4 +- test/suite/test_txn16.py | 140 +++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 234 insertions(+), 15 deletions(-) create mode 100644 test/suite/test_txn16.py diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index 08b572244af..dac16cc9d00 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -901,7 +901,7 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET(__wt_cond_alloc(session, "log sync", &log->log_sync_cond)); WT_RET(__wt_cond_alloc(session, "log write", &log->log_write_cond)); WT_RET(__wt_log_open(session)); - WT_RET(__wt_log_slot_init(session)); + WT_RET(__wt_log_slot_init(session, true)); return (0); } diff --git a/src/include/extern.h b/src/include/extern.h index 232ea6ff4e2..a3ce0f3746f 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -368,6 +368,7 @@ extern int __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, bo extern void __wt_log_written_reset(WT_SESSION_IMPL *session); extern int __wt_log_get_all_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp, uint32_t *maxid, bool active_only) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_log_extract_lognum( WT_SESSION_IMPL *session, const char *name, uint32_t *id) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_reset(WT_SESSION_IMPL *session, uint32_t lognum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_log_allocfile( WT_SESSION_IMPL *session, uint32_t lognum, const char *dest) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_log_remove(WT_SESSION_IMPL *session, const char *file_prefix, uint32_t lognum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -404,7 +405,7 @@ extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); extern int __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_log_slot_init(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_log_slot_init(WT_SESSION_IMPL *session, bool alloc) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_log_slot_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, uint32_t flags, WT_MYSLOT *myslot); extern int64_t __wt_log_slot_release(WT_MYSLOT *myslot, int64_t size); diff --git a/src/log/log.c b/src/log/log.c index c8ba7366f3f..0de881660b2 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -8,6 +8,7 @@ #include "wt_internal.h" +static int __log_newfile(WT_SESSION_IMPL *, bool, bool *); static int __log_openfile( WT_SESSION_IMPL *, WT_FH **, const char *, uint32_t, uint32_t); static int __log_write_internal( @@ -437,6 +438,60 @@ __wt_log_extract_lognum( return (0); } +/* + * __wt_log_reset -- + * Reset the existing log file to after the given file number. + * Called from recovery when toggling logging back on, it was off + * the previous open but it was on earlier before that toggle. + */ +int +__wt_log_reset(WT_SESSION_IMPL *session, uint32_t lognum) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_LOG *log; + uint32_t old_lognum; + u_int i, logcount; + char **logfiles; + + conn = S2C(session); + log = conn->log; + + if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) || + log->fileid > lognum) + return (0); + + WT_ASSERT(session, F_ISSET(conn, WT_CONN_RECOVERING)); + WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY)); + /* + * We know we're single threaded and called from recovery only when + * toggling logging back on. Therefore the only log files we have are + * old and outdated and the new one created when logging opened before + * recovery. We have to remove all old log files first and then create + * the new one so that log file numbers are contiguous in the file + * system. + */ + WT_RET(__wt_close(session, &log->log_fh)); + WT_RET(__log_get_files(session, + WT_LOG_FILENAME, &logfiles, &logcount)); + for (i = 0; i < logcount; i++) { + WT_ERR(__wt_log_extract_lognum( + session, logfiles[i], &old_lognum)); + WT_ASSERT(session, old_lognum < lognum); + WT_ERR(__wt_log_remove( + session, WT_LOG_FILENAME, old_lognum)); + } + log->fileid = lognum; + + /* Send in true to update connection creation LSNs. */ + WT_WITH_SLOT_LOCK(session, log, + ret = __log_newfile(session, true, NULL)); + WT_ERR(__wt_log_slot_init(session, false)); +err: WT_TRET( + __wt_fs_directory_list_free(session, &logfiles, logcount)); + return (ret); +} + /* * __log_zero -- * Zero a log file. diff --git a/src/log/log_slot.c b/src/log/log_slot.c index 444babfda92..5bd3d53a973 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -402,7 +402,7 @@ __wt_log_slot_switch(WT_SESSION_IMPL *session, * Initialize the slot array. */ int -__wt_log_slot_init(WT_SESSION_IMPL *session) +__wt_log_slot_init(WT_SESSION_IMPL *session, bool alloc) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -424,15 +424,17 @@ __wt_log_slot_init(WT_SESSION_IMPL *session) * switch log files very aggressively. Scale back the buffer for * small log file sizes. */ - log->slot_buf_size = (uint32_t)WT_MIN( - (size_t)conn->log_file_max / 10, WT_LOG_SLOT_BUF_SIZE); - for (i = 0; i < WT_SLOT_POOL; i++) { - WT_ERR(__wt_buf_init(session, - &log->slot_pool[i].slot_buf, log->slot_buf_size)); - F_SET(&log->slot_pool[i], WT_SLOT_INIT_FLAGS); + if (alloc) { + log->slot_buf_size = (uint32_t)WT_MIN( + (size_t)conn->log_file_max / 10, WT_LOG_SLOT_BUF_SIZE); + for (i = 0; i < WT_SLOT_POOL; i++) { + WT_ERR(__wt_buf_init(session, + &log->slot_pool[i].slot_buf, log->slot_buf_size)); + F_SET(&log->slot_pool[i], WT_SLOT_INIT_FLAGS); + } + WT_STAT_CONN_SET(session, + log_buffer_size, log->slot_buf_size * WT_SLOT_POOL); } - WT_STAT_CONN_SET(session, - log_buffer_size, log->slot_buf_size * WT_SLOT_POOL); /* * Set up the available slot from the pool the first time. */ diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c index 97e8e98f8e0..590e17b6a2a 100644 --- a/src/txn/txn_recover.c +++ b/src/txn/txn_recover.c @@ -20,6 +20,7 @@ typedef struct { } *files; size_t file_alloc; /* Allocated size of files array. */ u_int max_fileid; /* Maximum file ID seen. */ + WT_LSN max_lsn; /* Maximum checkpoint LSN seen. */ u_int nfiles; /* Number of files in the metadata. */ WT_LSN ckpt_lsn; /* Start LSN for main recovery loop. */ @@ -342,6 +343,10 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config) "Recovering %s with id %" PRIu32 " @ (%" PRIu32 ", %" PRIu32 ")", uri, fileid, lsn.l.file, lsn.l.offset); + if ((!WT_IS_MAX_LSN(&lsn) && !WT_IS_INIT_LSN(&lsn)) && + (WT_IS_MAX_LSN(&r->max_lsn) || __wt_log_cmp(&lsn, &r->max_lsn) > 0)) + r->max_lsn = lsn; + return (0); } @@ -428,6 +433,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session) WT_RET(__wt_open_internal_session(conn, "txn-recover", false, WT_SESSION_NO_LOGGING, &session)); r.session = session; + WT_MAX_LSN(&r.max_lsn); F_SET(conn, WT_CONN_RECOVERING); WT_ERR(__wt_metadata_search(session, WT_METAFILE_URI, &config)); @@ -443,9 +449,24 @@ __wt_txn_recover(WT_SESSION_IMPL *session) */ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_EXISTED) || WT_IS_MAX_LSN(&metafile->ckpt_lsn)) { + /* + * Detect if we're going from logging disabled to enabled. + * We need to know this to verify LSNs and start at the correct + * log file later. If someone ran with logging, then disabled + * it and removed all the log files and then turned logging back + * on, we have to start logging in the log file number that is + * larger than any checkpoint LSN we have from the earlier time. + */ WT_ERR(__recovery_file_scan(&r)); conn->next_file_id = r.max_fileid; - goto done; + + if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && + WT_IS_MAX_LSN(&metafile->ckpt_lsn) && + !WT_IS_MAX_LSN(&r.max_lsn)) { + WT_ERR(__wt_log_reset(session, r.max_lsn.l.file)); + goto ckpt; + } else + goto done; } /* @@ -556,7 +577,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session) * open is fast and keep the metadata up to date with the checkpoint * LSN and archiving. */ - WT_ERR(session->iface.checkpoint(&session->iface, "force=1")); +ckpt: WT_ERR(session->iface.checkpoint(&session->iface, "force=1")); done: FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_DONE); err: WT_TRET(__recovery_free(&r)); diff --git a/test/suite/test_txn09.py b/test/suite/test_txn09.py index e0710803f36..cc5771ef681 100644 --- a/test/suite/test_txn09.py +++ b/test/suite/test_txn09.py @@ -26,8 +26,8 @@ # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. # -# test_txn02.py -# Transactions: commits and rollbacks +# test_txn09.py +# Transactions: recovery toggling logging # import fnmatch, os, shutil, time diff --git a/test/suite/test_txn16.py b/test/suite/test_txn16.py new file mode 100644 index 00000000000..929da2291c7 --- /dev/null +++ b/test/suite/test_txn16.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# test_txn16.py +# Recovery: Test that toggling between logging and not logging does not +# continue to generate more log files. +# + +import fnmatch, os, shutil, time +from suite_subprocess import suite_subprocess +import wttest + +class test_txn16(wttest.WiredTigerTestCase, suite_subprocess): + t1 = 'table:test_txn16_1' + t2 = 'table:test_txn16_2' + t3 = 'table:test_txn16_3' + nentries = 1000 + create_params = 'key_format=i,value_format=i' + # Set the log file size small so we generate checkpoints + # with LSNs in different files. + conn_config = 'config_base=false,' + \ + 'log=(archive=false,enabled,file_max=100K),' + \ + 'transaction_sync=(method=dsync,enabled)' + conn_on = 'config_base=false,' + \ + 'log=(archive=false,enabled,file_max=100K),' + \ + 'transaction_sync=(method=dsync,enabled)' + conn_off = 'config_base=false,log=(enabled=false)' + + def populate_table(self, uri): + self.session.create(uri, self.create_params) + c = self.session.open_cursor(uri, None, None) + # Populate with an occasional checkpoint to generate + # some varying LSNs. + for i in range(self.nentries): + c[i] = i + 1 + if i % 900 == 0: + self.session.checkpoint() + c.close() + + def copy_dir(self, olddir, newdir): + ''' Simulate a crash from olddir and restart in newdir. ''' + # with the connection still open, copy files to new directory + shutil.rmtree(newdir, ignore_errors=True) + os.mkdir(newdir) + for fname in os.listdir(olddir): + fullname = os.path.join(olddir, fname) + # Skip lock file on Windows since it is locked + if os.path.isfile(fullname) and \ + "WiredTiger.lock" not in fullname and \ + "Tmplog" not in fullname and \ + "Preplog" not in fullname: + shutil.copy(fullname, newdir) + # close the original connection. + self.close_conn() + + def run_toggle(self, homedir): + loop = 0 + # Record original log files. There should never be overlap + # with these even after they're removed. + orig_logs = fnmatch.filter(os.listdir(homedir), "*Log*") + while loop < 3: + # Reopen with logging on to run recovery first time + on_conn = self.wiredtiger_open(homedir, self.conn_on) + on_conn.close() + if loop > 0: + # Get current log files. + cur_logs = fnmatch.filter(os.listdir(homedir), "*Log*") + scur = set(cur_logs) + sorig = set(orig_logs) + # There should never be overlap with the log files that + # were there originally. Mostly this checks that after + # opening with logging disabled and then re-enabled, we + # don't see log file 1. + self.assertEqual(scur.isdisjoint(sorig), True) + if loop > 1: + # We should be creating the same log files each time. + for l in cur_logs: + self.assertEqual(l in last_logs, True) + for l in last_logs: + self.assertEqual(l in cur_logs, True) + last_logs = cur_logs + loop += 1 + # Remove all log files before opening without logging. + cur_logs = fnmatch.filter(os.listdir(homedir), "*Log*") + for l in cur_logs: + path=homedir + "/" + l + os.remove(path) + off_conn = self.wiredtiger_open(homedir, self.conn_off) + off_conn.close() + + def test_recovery(self): + ''' Check log file creation when toggling. ''' + + # Here's the strategy: + # - With logging populate 4 tables. Checkpoint + # them at different times. + # - Copy to a new directory to simulate a crash. + # - Close the original connection. + # On both a "copy" to simulate a crash and the original (3x): + # - Record log files existing. + # - Reopen with logging to run recovery. Close connection. + # - Record log files existing. + # - Remove all log files. + # - Open connection with logging disabled. + # - Record log files existing. Verify we don't keep adding. + # + self.populate_table(self.t1) + self.populate_table(self.t2) + self.populate_table(self.t3) + self.copy_dir(".", "RESTART") + self.run_toggle(".") + self.run_toggle("RESTART") + +if __name__ == '__main__': + wttest.run() -- cgit v1.2.1 From 2c6aec8e9ff854d745a39d92ca2e4b9566822437 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Tue, 23 May 2017 14:03:20 -0400 Subject: WT-3331 Get time into a local variable so we can read and use a consistent time (#3430) --- src/os_posix/os_time.c | 21 ++++++++++++++++----- src/os_win/os_time.c | 8 +++++--- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/os_posix/os_time.c b/src/os_posix/os_time.c index ceb2c613551..cc9516468aa 100644 --- a/src/os_posix/os_time.c +++ b/src/os_posix/os_time.c @@ -16,6 +16,7 @@ void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) { + struct timespec tmp; WT_DECL_RET; /* @@ -27,24 +28,34 @@ __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) tsp->tv_sec = 0; tsp->tv_nsec = 0; + /* + * Read into a local variable so that we're comparing the correct + * value when we check for monotonic increasing time. There are + * many places we read into an unlocked global variable. + */ #if defined(HAVE_CLOCK_GETTIME) - WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, tsp), ret); + WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, &tmp), ret); if (ret == 0) { - __wt_time_check_monotonic(session, tsp); + __wt_time_check_monotonic(session, &tmp); + tsp->tv_sec = tmp.tv_sec; + tsp->tv_nsec = tmp.tv_nsec; return; } WT_PANIC_MSG(session, ret, "clock_gettime"); #elif defined(HAVE_GETTIMEOFDAY) + { struct timeval v; WT_SYSCALL_RETRY(gettimeofday(&v, NULL), ret); if (ret == 0) { - tsp->tv_sec = v.tv_sec; - tsp->tv_nsec = v.tv_usec * WT_THOUSAND; - __wt_time_check_monotonic(session, tsp); + tmp.tv_sec = v.tv_sec; + tmp.tv_nsec = v.tv_usec * WT_THOUSAND; + __wt_time_check_monotonic(session, &tmp); + *tsp = tmp; return; } WT_PANIC_MSG(session, ret, "gettimeofday"); + } #else NO TIME-OF-DAY IMPLEMENTATION: see src/os_posix/os_time.c #endif diff --git a/src/os_win/os_time.c b/src/os_win/os_time.c index 0818b7afcf0..038c1d78d21 100644 --- a/src/os_win/os_time.c +++ b/src/os_win/os_time.c @@ -15,6 +15,7 @@ void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) { + struct timespec tmp; FILETIME time; uint64_t ns100; @@ -22,9 +23,10 @@ __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) ns100 = (((int64_t)time.dwHighDateTime << 32) + time.dwLowDateTime) - 116444736000000000LL; - tsp->tv_sec = ns100 / 10000000; - tsp->tv_nsec = (long)((ns100 % 10000000) * 100); - __wt_time_check_monotonic(session, tsp); + tmp.tv_sec = ns100 / 10000000; + tmp.tv_nsec = (long)((ns100 % 10000000) * 100); + __wt_time_check_monotonic(session, &tmp); + *tsp = tmp; } /* -- cgit v1.2.1 From 8d4fe59dede876615fbd4bdff200db58173ff71b Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 24 May 2017 10:07:45 -0400 Subject: WT-3334 fix python test suite's BaseDataSet class for 'u' format (#3431) --- test/suite/wtdataset.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/test/suite/wtdataset.py b/test/suite/wtdataset.py index 88f73021639..3093f550e8b 100644 --- a/test/suite/wtdataset.py +++ b/test/suite/wtdataset.py @@ -71,9 +71,9 @@ class BaseDataSet(object): # Create a key for a Simple or Complex data set. @staticmethod def key_by_format(i, key_format): - if key_format == 'i' or key_format == 'r' or key_format == 'u': + if key_format == 'i' or key_format == 'r': return i - elif key_format == 'S': + elif key_format == 'S' or key_format == 'u': return str('%015d' % i) else: raise AssertionError( @@ -82,9 +82,9 @@ class BaseDataSet(object): # Create a value for a Simple data set. @staticmethod def value_by_format(i, value_format): - if value_format == 'i' or value_format == 'r' or value_format == 'u': + if value_format == 'i' or value_format == 'r': return i - elif value_format == 'S': + elif value_format == 'S' or value_format == 'u': return str(i) + ': abcdefghijklmnopqrstuvwxyz' elif value_format == '8t': value = ( @@ -94,8 +94,7 @@ class BaseDataSet(object): return value[i % len(value)] else: raise AssertionError( - 'value: object has unexpected format: ' - + value_format) + 'value: object has unexpected format: ' + value_format) # Create a key for this data set. Simple and Complex data sets have # the same key space. -- cgit v1.2.1 From a3073be71b1406c7eef60e35fcf2cdc1174e752b Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 24 May 2017 14:55:44 -0400 Subject: WT-2972 Add interface allowing partial updates to existing values (#3406) Document and implement a WT_CURSOR.modify API --- dist/s_define.list | 2 +- dist/s_string.ok | 3 + dist/stat_data.py | 6 +- examples/c/ex_all.c | 33 ++ .../java/com/wiredtiger/examples/ex_cursor.java | 41 +++ lang/java/Makefile.am | 2 + lang/java/java_doc.i | 1 + lang/java/wiredtiger.i | 233 ++++++++++++ lang/python/wiredtiger.i | 119 +++++- src/btree/bt_curnext.c | 8 +- src/btree/bt_curprev.c | 8 +- src/btree/bt_cursor.c | 87 +++-- src/btree/bt_debug.c | 4 +- src/btree/bt_delete.c | 2 +- src/btree/bt_read.c | 16 +- src/btree/bt_split.c | 6 +- src/btree/bt_stat.c | 20 +- src/btree/col_modify.c | 17 +- src/btree/row_modify.c | 20 +- src/cursor/cur_backup.c | 1 + src/cursor/cur_config.c | 1 + src/cursor/cur_ds.c | 1 + src/cursor/cur_dump.c | 1 + src/cursor/cur_file.c | 6 + src/cursor/cur_index.c | 1 + src/cursor/cur_join.c | 2 + src/cursor/cur_log.c | 1 + src/cursor/cur_metadata.c | 1 + src/cursor/cur_stat.c | 1 + src/cursor/cur_std.c | 115 ++++++ src/cursor/cur_table.c | 2 + src/docs/Doxyfile | 1 + src/include/btmem.h | 37 +- src/include/cursor.h | 2 + src/include/extern.h | 7 +- src/include/log.h | 2 +- src/include/lsm.h | 4 +- src/include/mutex.h | 4 +- src/include/mutex.i | 8 +- src/include/schema.h | 2 +- src/include/stat.h | 4 + src/include/txn.i | 2 +- src/include/verify_build.h | 1 + src/include/wiredtiger.in | 406 ++++++++++++--------- src/lsm/lsm_cursor.c | 1 + src/reconcile/rec_write.c | 23 +- src/support/stat.c | 14 + src/txn/txn.c | 2 +- src/txn/txn_log.c | 6 +- test/csuite/scope/main.c | 102 +++++- test/format/config.c | 17 +- test/format/config.h | 6 +- test/format/format.h | 27 +- test/format/ops.c | 278 +++++++++++++- test/suite/test_cursor12.py | 165 +++++++++ 55 files changed, 1527 insertions(+), 355 deletions(-) create mode 100644 test/suite/test_cursor12.py diff --git a/dist/s_define.list b/dist/s_define.list index b7f124ef18c..9f94132f584 100644 --- a/dist/s_define.list +++ b/dist/s_define.list @@ -58,7 +58,7 @@ WT_STAT_INCRV_BASE WT_STAT_WRITE WT_TIMEDIFF_US WT_TRET_ERROR_OK -WT_UPDATE_RESERVED_VALUE +WT_UPDATE_SIZE WT_WITH_LOCK_NOWAIT WT_WITH_LOCK_WAIT __F diff --git a/dist/s_string.ok b/dist/s_string.ok index ac21c61a8ef..d5a562fcbd1 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -344,6 +344,7 @@ Split's Stoica StoreLoad StoreStore +Su Syscall TAILQ TCMalloc @@ -949,6 +950,7 @@ nd needkey needvalue negint +nentries newbar newfile newuri @@ -1092,6 +1094,7 @@ rotN rotn rp rpc +ru run's runtime rwlock diff --git a/dist/stat_data.py b/dist/stat_data.py index 4a147ca44eb..203a88fb055 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -231,8 +231,8 @@ connection_stats = [ CacheStat('cache_eviction_walks_abandoned', 'eviction walks abandoned'), CacheStat('cache_eviction_walks_active', 'files with active eviction walks', 'no_clear,no_scale'), CacheStat('cache_eviction_walks_started', 'files with new eviction walks started'), - CacheStat('cache_eviction_worker_evicting', 'eviction worker thread evicting pages'), CacheStat('cache_eviction_worker_created', 'eviction worker thread created'), + CacheStat('cache_eviction_worker_evicting', 'eviction worker thread evicting pages'), CacheStat('cache_eviction_worker_removed', 'eviction worker thread removed'), CacheStat('cache_hazard_checks', 'hazard pointer check calls'), CacheStat('cache_hazard_max', 'hazard pointer maximum array length', 'max_aggregate,no_scale'), @@ -262,9 +262,11 @@ connection_stats = [ ########################################## CursorStat('cursor_create', 'cursor create calls'), CursorStat('cursor_insert', 'cursor insert calls'), + CursorStat('cursor_modify', 'cursor modify calls'), CursorStat('cursor_next', 'cursor next calls'), CursorStat('cursor_prev', 'cursor prev calls'), CursorStat('cursor_remove', 'cursor remove calls'), + CursorStat('cursor_reserve', 'cursor reserve calls'), CursorStat('cursor_reset', 'cursor reset calls'), CursorStat('cursor_restart', 'cursor restarted searches'), CursorStat('cursor_search', 'cursor search calls'), @@ -553,10 +555,12 @@ dsrc_stats = [ CursorStat('cursor_insert', 'insert calls'), CursorStat('cursor_insert_bulk', 'bulk-loaded cursor-insert calls'), CursorStat('cursor_insert_bytes', 'cursor-insert key and value bytes inserted', 'size'), + CursorStat('cursor_modify', 'modify calls'), CursorStat('cursor_next', 'next calls'), CursorStat('cursor_prev', 'prev calls'), CursorStat('cursor_remove', 'remove calls'), CursorStat('cursor_remove_bytes', 'cursor-remove key bytes removed', 'size'), + CursorStat('cursor_reserve', 'reserve calls'), CursorStat('cursor_reset', 'reset calls'), CursorStat('cursor_restart', 'restarted searches'), CursorStat('cursor_search', 'search calls'), diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c index f94863584e8..5e1fa4bbcc5 100644 --- a/examples/c/ex_all.c +++ b/examples/c/ex_all.c @@ -308,6 +308,39 @@ cursor_ops(WT_SESSION *session) /*! [Reserve a record] */ } + { + /*! [Modify an existing record] */ + WT_MODIFY entries[3]; + const char *key = "some key"; + ret = session->open_cursor( + session, "table:mytable", NULL, NULL, &cursor); + + /* Position the cursor. */ + cursor->set_key(cursor, key); + ret = cursor->search(cursor); + + /* Replace 20 bytes starting at byte offset 5. */ + entries[0].data.data = "some data"; + entries[0].data.size = strlen(entries[0].data.data); + entries[0].offset = 5; + entries[0].size = 20; + + /* Insert data at byte offset 40. */ + entries[1].data.data = "and more data"; + entries[1].data.size = strlen(entries[1].data.data); + entries[1].offset = 40; + entries[1].size = 0; + + /* Replace 2 bytes starting at byte offset 10. */ + entries[2].data.data = "and more data"; + entries[2].data.size = strlen(entries[2].data.data); + entries[2].offset = 10; + entries[2].size = 2; + + ret = cursor->modify(cursor, entries, 3); + /*! [Modify an existing record] */ + } + { /*! [Update an existing record or insert a new record] */ const char *key = "some key", *value = "some value"; diff --git a/examples/java/com/wiredtiger/examples/ex_cursor.java b/examples/java/com/wiredtiger/examples/ex_cursor.java index 498ace12865..4a57f3c35da 100644 --- a/examples/java/com/wiredtiger/examples/ex_cursor.java +++ b/examples/java/com/wiredtiger/examples/ex_cursor.java @@ -156,6 +156,41 @@ public class ex_cursor { } /*! [cursor remove] */ + /*! [cursor modify] */ + public static int + cursor_modify(Cursor cursor) + throws WiredTigerException + { + byte orig[] = new byte[4]; + for (int i = 0; i < 4; i++) + orig[i] = (byte)i; + cursor.putKeyString("key"); + cursor.putValueByteArray(orig); + cursor.insert(); // 0x0 0x1 0x2 0x3 + + byte b10[] = new byte[4]; + for (int i = 0; i < 4; i++) + b10[i] = (byte)(0x10 + i); + byte b20[] = new byte[4]; + for (int i = 0; i < 4; i++) + b20[i] = (byte)(0x20 + i); + + Modify modlist[] = new Modify[2]; + // The following Modify replaces one byte at position one by: + // (0x10 0x11 0x12 0x13), leaving: + // 0x0 0x10 0x11 0x12 0x13 0x2 0x3 + modlist[0] = new Modify(b10, 1, 1); + + // The following Modify replaces one byte at position three by: + // (0x20 0x21 0x22 0x23), leaving: + // 0x0 0x10 0x11 0x20 0x21 0x22 0x23 0x13 0x2 0x3 + modlist[1] = new Modify(b20, 3, 1); + + cursor.putKeyString("key"); + return (cursor.modify(modlist)); + } + /*! [cursor modify] */ + public static int cursorExample() throws WiredTigerException @@ -219,6 +254,12 @@ public class ex_cursor { ret = cursor_remove(cursor); ret = cursor.close(); + /* Create a table with a raw value to illustrate certain operations. */ + ret = session.create("table:raw", "key_format=S,value_format=u"); + cursor = session.open_cursor("table:raw", null, null); + ret = cursor_modify(cursor); + ret = cursor.close(); + /* Note: closing the connection implicitly closes open session(s). */ if ((ret = conn.close(null)) != 0) System.err.println("Error connecting to " + home + ": " + diff --git a/lang/java/Makefile.am b/lang/java/Makefile.am index 2ff822a5d08..71515c430fd 100644 --- a/lang/java/Makefile.am +++ b/lang/java/Makefile.am @@ -18,6 +18,7 @@ JAVA_SRC = \ $(JAVADESTFULL)/AsyncOpType.java \ $(JAVADESTFULL)/Connection.java \ $(JAVADESTFULL)/Cursor.java \ + $(JAVADESTFULL)/Modify.java \ $(JAVADESTFULL)/SearchStatus.java \ $(JAVADESTFULL)/PackFormatInputStream.java \ $(JAVADESTFULL)/PackInputStream.java \ @@ -31,6 +32,7 @@ JAVA_SRC = \ $(JAVADESTFULL)/wiredtiger.java \ $(JAVADESTFULL)/wiredtigerConstants.java \ $(JAVADESTFULL)/wiredtigerJNI.java \ + $(JAVADESTFULL)/WT_MODIFY_LIST.java \ $(JAVAEXAMPLES)/ex_access.java \ $(JAVAEXAMPLES)/ex_all.java \ $(JAVAEXAMPLES)/ex_async.java \ diff --git a/lang/java/java_doc.i b/lang/java/java_doc.i index 8088abbf065..f9e017ee43a 100644 --- a/lang/java/java_doc.i +++ b/lang/java/java_doc.i @@ -12,6 +12,7 @@ COPYDOC(__wt_cursor, WT_CURSOR, reset) COPYDOC(__wt_cursor, WT_CURSOR, search) COPYDOC(__wt_cursor, WT_CURSOR, search_near) COPYDOC(__wt_cursor, WT_CURSOR, insert) +COPYDOC(__wt_cursor, WT_CURSOR, modify) COPYDOC(__wt_cursor, WT_CURSOR, update) COPYDOC(__wt_cursor, WT_CURSOR, remove) COPYDOC(__wt_cursor, WT_CURSOR, reserve) diff --git a/lang/java/wiredtiger.i b/lang/java/wiredtiger.i index d6fc5fc8b9a..4c22a0af43b 100644 --- a/lang/java/wiredtiger.i +++ b/lang/java/wiredtiger.i @@ -47,6 +47,7 @@ %} %{ +#include "wiredtiger.h" #include "src/include/wt_internal.h" /* @@ -108,6 +109,23 @@ static void throwWiredTigerException(JNIEnv *jenv, int err) { (*jenv)->ThrowNew(jenv, excep, wiredtiger_strerror(err)); } +struct __wt_java_modify_impl; +struct __wt_java_modify_list; +typedef struct __wt_java_modify_impl WT_MODIFY_IMPL; +typedef struct __wt_java_modify_list WT_MODIFY_LIST; +static void modify_impl_release(WT_MODIFY_IMPL *impl); +static void modify_list_release(WT_MODIFY_LIST *impl); + +/* + * An extension to the WT_MODIFY struct, so we can associate some Java-specific + * information with it. + */ +typedef struct __wt_java_modify_impl { + WT_MODIFY modify; + JNIEnv *jnienv; + jobject ref; +} WT_MODIFY_IMPL; + %} /* No finalizers */ @@ -159,6 +177,32 @@ static void throwWiredTigerException(JNIEnv *jenv, int err) { } %} +/* + * In some cases, for an internal interface, we need something like a WT_ITEM, + * but we need to hold onto the memory past the method call, and release it + * later. A WT_ITEM_HOLD serves the purpose, it retains the java object + * for the byte array that we make into a global reference. + */ +%typemap(jni) WT_ITEM_HOLD, WT_ITEM_HOLD * "jbyteArray" +%typemap(jtype) WT_ITEM_HOLD, WT_ITEM_HOLD * "byte[]" +%typemap(jstype) WT_ITEM_HOLD, WT_ITEM_HOLD * "byte[]" + +%typemap(javain) WT_ITEM_HOLD, WT_ITEM_HOLD * "$javainput" +%typemap(javaout) WT_ITEM_HOLD, WT_ITEM_HOLD * { + return ($jnicall); +} +%typemap(in) WT_ITEM_HOLD * (WT_ITEM_HOLD item) %{ + $1 = &item; + $1->data = (*jenv)->GetByteArrayElements(jenv, $input, 0); + $1->size = (size_t)(*jenv)->GetArrayLength(jenv, $input); + $1->jnienv = jenv; + $1->ref = (*jenv)->NewGlobalRef(jenv, $input); +%} + +%typemap(argout) WT_ITEM_HOLD * %{ + /* Explicitly don't release the byte array elements here. */ +%} + /* Don't require empty config strings. */ %typemap(default) const char *config %{ $1 = NULL; %} @@ -309,6 +353,10 @@ WT_CLASS(struct __wt_async_op, WT_ASYNC_OP, op) %rename (prev_wrap) __wt_cursor::prev; %javamethodmodifiers __wt_cursor::key_format "protected"; %javamethodmodifiers __wt_cursor::value_format "protected"; +%ignore __wt_modify::data; +%ignore __wt_modify::position; +%ignore __wt_modify::size; +%ignore __wt_cursor::modify; %ignore __wt_cursor::compare(WT_CURSOR *, WT_CURSOR *, int *); %rename (compare_wrap) __wt_cursor::compare; @@ -1224,6 +1272,47 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; JCALL1(DeleteLocalRef, jcb->jnienv, jcursor); return (0); } + + int modify_wrap(WT_MODIFY_LIST *list, WT_ITEM *k) { + int ret; + + $self->set_key($self, k); + ret = $self->modify(self, list->mod_array, list->count); + modify_list_release(list); + return (ret); + } + + /* + * Called internally after a new call. The artificial constructor for + * WT_MODIFY_LIST has no opportunity to throw an exception on a memory + * allocation failure, so the the null check must be made within a + * method on WT_CURSOR. + */ + bool _new_check_modify_list(WT_MODIFY_LIST *list) { + JAVA_CALLBACK *jcb; + if (list == NULL) { + jcb = (JAVA_CALLBACK *)$self->lang_private; + throwWiredTigerException(jcb->jnienv, ENOMEM); + return (false); + } + return (true); + } + + /* + * Called internally after a new call. The artificial constructor for + * WT_MODIFY has no opportunity to throw an exception on a memory + * allocation failure, so the the null check must be made within a + * method on WT_CURSOR. + */ + bool _new_check_modify(WT_MODIFY *mod) { + JAVA_CALLBACK *jcb; + if (mod == NULL) { + jcb = (JAVA_CALLBACK *)$self->lang_private; + throwWiredTigerException(jcb->jnienv, ENOMEM); + return (false); + } + return (true); + } } /* Cache key/value formats in Cursor */ @@ -1820,6 +1909,149 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; return new PackInputStream(valueFormat, get_value_wrap(), _java_raw()); } + + /** + * Modify an existing record. + * + * The cursor must already be positioned, and the key's value will be + * updated. + * + * \param mods an array of modifications. + * \return 0 on success, errno on error. + */ + public int modify(Modify mods[]) + throws WiredTigerException { + byte[] key = keyPacker.getValue(); + keyPacker.reset(); + + WT_MODIFY_LIST l = new WT_MODIFY_LIST(mods.length); + if (!_new_check_modify_list(l)) + return (0); // exception is already thrown + int pos = 0; + + for (Modify m : mods) { + if (!_new_check_modify(m)) + return (0); // exception is already thrown + l.set(pos, m); + pos++; + } + return modify_wrap(l, key); + } +%} + +/* + * Support for WT_CURSOR.modify. + */ + +%inline %{ +typedef struct __wt_java_item_hold { +#ifndef SWIG + void *data; + size_t size; + JNIEnv *jnienv; + jobject ref; +#endif +} WT_ITEM_HOLD; + +/* + * An internal Java class encapsulates a list of Modify objects (stored as a + * WT_MODIFY array in C). + */ +typedef struct __wt_java_modify_list { +#ifndef SWIG + WT_MODIFY *mod_array; + jobject *ref_array; + JNIEnv *jnienv; + int count; +#endif +} WT_MODIFY_LIST; +%} +%extend __wt_java_modify_list { + __wt_java_modify_list(int count) { + WT_MODIFY_LIST *self; + if (__wt_calloc_def(NULL, 1, &self) != 0) + return (NULL); + if (__wt_calloc_def(NULL, (size_t)count, + &self->mod_array) != 0) { + __wt_free(NULL, self); + return (NULL); + } + if (__wt_calloc_def(NULL, (size_t)count, + &self->ref_array) != 0) { + __wt_free(NULL, self->mod_array); + __wt_free(NULL, self); + return (NULL); + } + self->count = count; + return (self); + } + ~__wt_java_modify_list() { + modify_list_release(self); + __wt_free(NULL, self); + } + void set(int i, WT_MODIFY *m) { + WT_MODIFY_IMPL *impl = (WT_MODIFY_IMPL *)m; + self->mod_array[i] = *m; + self->ref_array[i] = impl->ref; + impl->ref = (jobject)0; + self->jnienv = impl->jnienv; + } +}; + +%extend __wt_modify { + __wt_modify() { + WT_MODIFY_IMPL *self; + if (__wt_calloc_def(NULL, 1, &self) != 0) + return (NULL); + self->modify.data.data = NULL; + self->modify.data.size = 0; + self->modify.offset = 0; + self->modify.size = 0; + return (&self->modify); + } + __wt_modify(WT_ITEM_HOLD *itemdata, + size_t offset, size_t size) { + WT_MODIFY_IMPL *self; + if (__wt_calloc_def(NULL, 1, &self) != 0) + return (NULL); + self->modify.data.data = itemdata->data; + self->modify.data.size = itemdata->size; + self->modify.offset = offset; + self->modify.size = size; + self->ref = itemdata->ref; + self->jnienv = itemdata->jnienv; + return (&self->modify); + } + ~__wt_modify() { + modify_impl_release((WT_MODIFY_IMPL *)self); + __wt_free(NULL, self); + } +}; + +%{ +static void modify_list_release(WT_MODIFY_LIST *list) { + for (int i = 0; i < list->count; i++) + if (list->ref_array[i] != (jobject)0) { + (*list->jnienv)->ReleaseByteArrayElements( + list->jnienv, list->ref_array[i], + (jbyte *)list->mod_array[i].data.data, 0); + (*list->jnienv)->DeleteGlobalRef( + list->jnienv, list->ref_array[i]); + } + __wt_free(NULL, list->ref_array); + __wt_free(NULL, list->mod_array); + list->count = 0; +} + +static void modify_impl_release(WT_MODIFY_IMPL *impl) { + if (impl->ref != (jobject)0) { + (*impl->jnienv)->ReleaseByteArrayElements( + impl->jnienv, impl->ref, + (jbyte *)impl->modify.data.data, 0); + (*impl->jnienv)->DeleteGlobalRef(impl->jnienv, impl->ref); + impl->ref = (jobject)0; + } +} %} /* Put a WiredTigerException on all wrapped methods. We'd like this @@ -1902,6 +2134,7 @@ REQUIRE_WRAP(WT_ASYNC_OP::get_id, __wt_async_op::get_id,getId) %rename(AsyncOp) __wt_async_op; %rename(Cursor) __wt_cursor; +%rename(Modify) __wt_modify; %rename(Session) __wt_session; %rename(Connection) __wt_connection; diff --git a/lang/python/wiredtiger.i b/lang/python/wiredtiger.i index 7a297312bb8..61c7fc62c43 100644 --- a/lang/python/wiredtiger.i +++ b/lang/python/wiredtiger.i @@ -151,6 +151,74 @@ from packing import pack, unpack } } +%typemap(in) WT_MODIFY * (int len, WT_MODIFY *modarray, int i) { + len = PyList_Size($input); + /* + * We allocate an extra cleared WT_MODIFY struct, the first + * entry will be used solely to transmit the array length to + * the call site. + */ + if (__wt_calloc_def(NULL, (size_t)len + 1, &modarray) != 0) + SWIG_exception_fail(SWIG_MemoryError, "WT calloc failed"); + modarray[0].size = (size_t)len; + for (i = 1; i <= len; i++) { + PyObject *dataobj, *modobj, *offsetobj, *sizeobj; + char *datadata; + long offset, size; + Py_ssize_t datasize; + + if ((modobj = PySequence_GetItem($input, i - 1)) == NULL) + SWIG_exception_fail(SWIG_IndexError, + "Modify sequence failed"); + + WT_GETATTR(dataobj, modobj, "data"); + if (PyString_AsStringAndSize(dataobj, &datadata, + &datasize) < 0) { + Py_DECREF(dataobj); + Py_DECREF(modobj); + SWIG_exception_fail(SWIG_AttributeError, + "Modify.data bad value"); + } + modarray[i].data.data = malloc(datasize); + memcpy(modarray[i].data.data, datadata, datasize); + modarray[i].data.size = datasize; + Py_DECREF(dataobj); + + WT_GETATTR(offsetobj, modobj, "offset"); + if ((offset = PyInt_AsLong(offsetobj)) < 0) { + Py_DECREF(offsetobj); + Py_DECREF(modobj); + SWIG_exception_fail(SWIG_RuntimeError, + "Modify.offset bad value"); + } + modarray[i].offset = offset; + Py_DECREF(offsetobj); + + WT_GETATTR(sizeobj, modobj, "size"); + if ((size = PyInt_AsLong(sizeobj)) < 0) { + Py_DECREF(sizeobj); + Py_DECREF(modobj); + SWIG_exception_fail(SWIG_RuntimeError, + "Modify.size bad value"); + } + modarray[i].size = size; + Py_DECREF(sizeobj); + Py_DECREF(modobj); + } + $1 = modarray; +} + +%typemap(freearg) WT_MODIFY * { + /* The WT_MODIFY arg is in position 2. Is there a better way? */ + WT_MODIFY *modarray = modarray2; + size_t i, len; + + len = modarray[0].size; + for (i = 1; i <= len; i++) + __wt_free(NULL, modarray[i].data.data); + __wt_free(NULL, modarray); +} + /* 64 bit typemaps. */ %typemap(in) uint64_t { $1 = PyLong_AsUnsignedLongLong($input); @@ -244,6 +312,13 @@ static PyObject *wtError; static int sessionFreeHandler(WT_SESSION *session_arg); static int cursorFreeHandler(WT_CURSOR *cursor_arg); + +#define WT_GETATTR(var, parent, name) \ + do if ((var = PyObject_GetAttrString(parent, name)) == NULL) { \ + Py_DECREF(parent); \ + SWIG_exception_fail(SWIG_AttributeError, \ + "Modify." #name " get failed"); \ + } while(0) %} %init %{ @@ -373,8 +448,8 @@ retry: } %enddef -/* Any API that returns an enum type uses this. */ -%define ENUM_OK(m) +/* An API that returns a value that shouldn't be checked uses this. */ +%define ANY_OK(m) %exception m { $action } @@ -408,12 +483,14 @@ retry: %enddef EBUSY_OK(__wt_connection::async_new_op) -ENUM_OK(__wt_async_op::get_type) +ANY_OK(__wt_async_op::get_type) NOTFOUND_OK(__wt_cursor::next) NOTFOUND_OK(__wt_cursor::prev) NOTFOUND_OK(__wt_cursor::remove) NOTFOUND_OK(__wt_cursor::search) NOTFOUND_OK(__wt_cursor::update) +ANY_OK(__wt_modify::__wt_modify) +ANY_OK(__wt_modify::~__wt_modify) COMPARE_OK(__wt_cursor::_compare) COMPARE_OK(__wt_cursor::_equals) @@ -448,6 +525,11 @@ COMPARE_NOTFOUND_OK(__wt_cursor::_search_near) %ignore __wt_cursor::get_value; %ignore __wt_cursor::set_key; %ignore __wt_cursor::set_value; +%ignore __wt_cursor::modify(WT_CURSOR *, WT_MODIFY *, int); +%rename (modify) __wt_cursor::_modify; +%ignore __wt_modify::data; +%ignore __wt_modify::offset; +%ignore __wt_modify::size; /* Next, override methods that return integers via arguments. */ %ignore __wt_cursor::compare(WT_CURSOR *, WT_CURSOR *, int *); @@ -772,6 +854,15 @@ typedef int int_void; return (cursorFreeHandler($self)); } + /* + * modify: the size of the array was put into the first element by the + * typemap. + */ + int _modify(WT_MODIFY *list) { + int count = (int)list[0].size; + return (self->modify(self, &list[1], count)); + } + %pythoncode %{ def get_key(self): '''get_key(self) -> object @@ -870,6 +961,21 @@ typedef int int_void; %} }; +/* + * Support for WT_CURSOR.modify. The WT_MODIFY object is known to + * SWIG, but its attributes are regular Python attributes. + * We extract the attributes at the call site to WT_CURSOR.modify + * so we don't have to deal with managing Python objects references. + */ +%extend __wt_modify { +%pythoncode %{ + def __init__(self, data = '', offset = 0, size = 0): + self.data = data + self.offset = offset + self.size = size +%} +}; + %extend __wt_session { int _log_printf(const char *msg) { return self->log_printf(self, "%s", msg); @@ -951,6 +1057,7 @@ OVERRIDE_METHOD(__wt_session, WT_SESSION, log_printf, (self, msg)) %rename(AsyncOp) __wt_async_op; %rename(Cursor) __wt_cursor; +%rename(Modify) __wt_modify; %rename(Session) __wt_session; %rename(Connection) __wt_connection; @@ -974,7 +1081,7 @@ writeToPythonStream(const char *streamname, const char *message) written = NULL; arglist = arglist2 = NULL; msglen = strlen(message); - msg = malloc(msglen + 2); + WT_RET(__wt_malloc(NULL, msglen + 2, &msg)); strcpy(msg, message); strcpy(&msg[msglen], "\n"); @@ -1010,8 +1117,7 @@ err: Py_XDECREF(arglist2); /* Release python Global Interpreter Lock */ SWIG_PYTHON_THREAD_END_BLOCK; - if (msg) - free(msg); + __wt_free(NULL, msg); return (ret); } @@ -1232,4 +1338,3 @@ _rename_with_prefix('WT_STAT_CONN_', stat.conn) _rename_with_prefix('WT_STAT_DSRC_', stat.dsrc) del _rename_with_prefix %} - diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c index 091b9345713..7b92a58991d 100644 --- a/src/btree/bt_curnext.c +++ b/src/btree/bt_curnext.c @@ -142,7 +142,7 @@ new_page: if (cbt->ins == NULL) __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL) continue; - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -205,7 +205,7 @@ new_page: /* Find the matching WT_COL slot. */ upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd); if (upd != NULL) { - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -325,7 +325,7 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage) new_insert: if ((ins = cbt->ins) != NULL) { if ((upd = __wt_txn_read(session, ins->upd)) == NULL) continue; - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -358,7 +358,7 @@ new_insert: if ((ins = cbt->ins) != NULL) { cbt->slot = cbt->row_iteration_slot / 2 - 1; rip = &page->pg_row[cbt->slot]; upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip)); - if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd)) { + if (upd != NULL && upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c index 4f0fa77d3e6..55b5095fe91 100644 --- a/src/btree/bt_curprev.c +++ b/src/btree/bt_curprev.c @@ -288,7 +288,7 @@ new_page: if (cbt->ins == NULL) __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL) continue; - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -352,7 +352,7 @@ new_page: if (cbt->recno < cbt->ref->ref_recno) upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd); if (upd != NULL) { - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -482,7 +482,7 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage) new_insert: if ((ins = cbt->ins) != NULL) { if ((upd = __wt_txn_read(session, ins->upd)) == NULL) continue; - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -517,7 +517,7 @@ new_insert: if ((ins = cbt->ins) != NULL) { cbt->slot = cbt->row_iteration_slot / 2 - 1; rip = &page->pg_row[cbt->slot]; upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip)); - if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd)) { + if (upd != NULL && upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 664545ee3a0..7e415150cc5 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -224,7 +224,7 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) */ if (cbt->ins != NULL && (upd = __wt_txn_read(session, cbt->ins->upd)) != NULL) { - if (WT_UPDATE_DELETED_ISSET(upd)) + if (upd->type == WT_UPDATE_DELETED) return (false); if (updp != NULL) *updp = upd; @@ -297,7 +297,7 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) page->modify->mod_row_update != NULL && (upd = __wt_txn_read(session, page->modify->mod_row_update[cbt->slot])) != NULL) { - if (WT_UPDATE_DELETED_ISSET(upd)) + if (upd->type == WT_UPDATE_DELETED) return (false); if (updp != NULL) *updp = upd; @@ -342,11 +342,11 @@ __cursor_row_search( * Column-store delete, insert, and update from an application cursor. */ static inline int -__cursor_col_modify(WT_SESSION_IMPL *session, - WT_CURSOR_BTREE *cbt, bool is_remove, bool is_reserve) +__cursor_col_modify( + WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type) { return (__wt_col_modify(session, cbt, - cbt->iface.recno, &cbt->iface.value, NULL, is_remove, is_reserve)); + cbt->iface.recno, &cbt->iface.value, NULL, modify_type)); } /* @@ -354,11 +354,11 @@ __cursor_col_modify(WT_SESSION_IMPL *session, * Row-store insert, update and delete from an application cursor. */ static inline int -__cursor_row_modify(WT_SESSION_IMPL *session, - WT_CURSOR_BTREE *cbt, bool is_remove, bool is_reserve) +__cursor_row_modify( + WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type) { return (__wt_row_modify(session, cbt, - &cbt->iface.key, &cbt->iface.value, NULL, is_remove, is_reserve)); + &cbt->iface.key, &cbt->iface.value, NULL, modify_type)); } /* @@ -662,8 +662,8 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) */ cbt->compare = 0; ret = btree->type == BTREE_ROW ? - __cursor_row_modify(session, cbt, false, false) : - __cursor_col_modify(session, cbt, false, false); + __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD) : + __cursor_col_modify(session, cbt, WT_UPDATE_STANDARD); if (ret == 0) goto done; @@ -700,7 +700,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) WT_ERR(WT_DUPLICATE_KEY); - ret = __cursor_row_modify(session, cbt, false, false); + ret = __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD); } else { /* * Optionally insert a new record (ignoring the application's @@ -723,7 +723,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); (cbt->compare != 0 && __cursor_fix_implicit(btree, cbt)))) WT_ERR(WT_DUPLICATE_KEY); - WT_ERR(__cursor_col_modify(session, cbt, false, false)); + WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD)); if (append_key) cbt->iface.recno = cbt->recno; @@ -881,8 +881,8 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) */ cbt->compare = 0; ret = btree->type == BTREE_ROW ? - __cursor_row_modify(session, cbt, true, false) : - __cursor_col_modify(session, cbt, true, false); + __cursor_row_modify(session, cbt, WT_UPDATE_DELETED) : + __cursor_col_modify(session, cbt, WT_UPDATE_DELETED); if (ret == 0) goto done; @@ -921,7 +921,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) WT_ERR(WT_NOTFOUND); - ret = __cursor_row_modify(session, cbt, true, false); + ret = __cursor_row_modify(session, cbt, WT_UPDATE_DELETED); } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); @@ -948,7 +948,8 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); */ cbt->recno = cursor->recno; } else - ret = __cursor_col_modify(session, cbt, true, false); + ret = __cursor_col_modify( + session, cbt, WT_UPDATE_DELETED); } err: if (ret == WT_RESTART) { @@ -986,7 +987,7 @@ done: /* * Update a record in the tree. */ static int -__btcur_update(WT_CURSOR_BTREE *cbt, bool is_reserve) +__btcur_update(WT_CURSOR_BTREE *cbt, u_int modify_type) { WT_BTREE *btree; WT_CURFILE_STATE state; @@ -998,15 +999,6 @@ __btcur_update(WT_CURSOR_BTREE *cbt, bool is_reserve) cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; - WT_STAT_CONN_INCR(session, cursor_update); - WT_STAT_DATA_INCR(session, cursor_update); - WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size); - - if (btree->type == BTREE_ROW) - WT_RET(__cursor_size_chk(session, &cursor->key)); - if (!is_reserve) - WT_RET(__cursor_size_chk(session, &cursor->value)); - /* It's no longer possible to bulk-load into the tree. */ __cursor_disable_bulk(session, btree); @@ -1030,8 +1022,8 @@ __btcur_update(WT_CURSOR_BTREE *cbt, bool is_reserve) */ cbt->compare = 0; ret = btree->type == BTREE_ROW ? - __cursor_row_modify(session, cbt, false, is_reserve) : - __cursor_col_modify(session, cbt, false, is_reserve); + __cursor_row_modify(session, cbt, modify_type) : + __cursor_col_modify(session, cbt, modify_type); if (ret == 0) goto done; @@ -1069,7 +1061,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) WT_ERR(WT_NOTFOUND); } - ret = __cursor_row_modify(session, cbt, false, is_reserve); + ret = __cursor_row_modify(session, cbt, modify_type); } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); @@ -1088,7 +1080,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); !__cursor_fix_implicit(btree, cbt)) WT_ERR(WT_NOTFOUND); } - ret = __cursor_col_modify(session, cbt, false, is_reserve); + ret = __cursor_col_modify(session, cbt, modify_type); } err: if (ret == WT_RESTART) { @@ -1106,7 +1098,7 @@ err: if (ret == WT_RESTART) { * pointer to the modify function's allocated update structure. */ done: if (ret == 0) { - if (is_reserve) { + if (modify_type == WT_UPDATE_RESERVED) { F_CLR(cursor, WT_CURSTD_VALUE_SET); WT_TRET(__wt_key_return(session, cbt)); } else @@ -1131,14 +1123,19 @@ __wt_btcur_reserve(WT_CURSOR_BTREE *cbt) { WT_CURSOR *cursor; WT_DECL_RET; + WT_SESSION_IMPL *session; bool overwrite; cursor = &cbt->iface; + session = (WT_SESSION_IMPL *)cursor->session; + + WT_STAT_CONN_INCR(session, cursor_reserve); + WT_STAT_DATA_INCR(session, cursor_reserve); /* WT_CURSOR.reserve is update-without-overwrite and a special value. */ overwrite = F_ISSET(cursor, WT_CURSTD_OVERWRITE); F_CLR(cursor, WT_CURSTD_OVERWRITE); - ret = __btcur_update(cbt, true); + ret = __btcur_update(cbt, WT_UPDATE_RESERVED); if (overwrite) F_SET(cursor, WT_CURSTD_OVERWRITE); return (ret); @@ -1151,7 +1148,23 @@ __wt_btcur_reserve(WT_CURSOR_BTREE *cbt) int __wt_btcur_update(WT_CURSOR_BTREE *cbt) { - return (__btcur_update(cbt, false)); + WT_BTREE *btree; + WT_CURSOR *cursor; + WT_SESSION_IMPL *session; + + btree = cbt->btree; + cursor = &cbt->iface; + session = (WT_SESSION_IMPL *)cursor->session; + + WT_STAT_CONN_INCR(session, cursor_update); + WT_STAT_DATA_INCR(session, cursor_update); + WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size); + + if (btree->type == BTREE_ROW) + WT_RET(__cursor_size_chk(session, &cursor->key)); + WT_RET(__cursor_size_chk(session, &cursor->value)); + + return (__btcur_update(cbt, WT_UPDATE_STANDARD)); } /* @@ -1274,7 +1287,7 @@ __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) static int __cursor_truncate(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, - int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, bool, bool)) + int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int)) { WT_DECL_RET; @@ -1302,7 +1315,7 @@ retry: WT_RET(__wt_btcur_search(start)); F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); for (;;) { - if ((ret = rmfunc(session, start, true, false)) != 0) + if ((ret = rmfunc(session, start, WT_UPDATE_DELETED)) != 0) break; if (stop != NULL && __cursor_equals(start, stop)) @@ -1329,7 +1342,7 @@ retry: WT_RET(__wt_btcur_search(start)); static int __cursor_truncate_fix(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, - int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, bool, bool)) + int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int)) { WT_DECL_RET; const uint8_t *value; @@ -1360,7 +1373,7 @@ retry: WT_RET(__wt_btcur_search(start)); for (;;) { value = (const uint8_t *)start->iface.value.data; if (*value != 0 && - (ret = rmfunc(session, start, true, false)) != 0) + (ret = rmfunc(session, start, WT_UPDATE_DELETED)) != 0) break; if (stop != NULL && __cursor_equals(start, stop)) diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index 538c363a864..c3f98a98ec5 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -985,9 +985,9 @@ static int __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte) { for (; upd != NULL; upd = upd->next) - if (WT_UPDATE_DELETED_ISSET(upd)) + if (upd->type == WT_UPDATE_DELETED) WT_RET(ds->f(ds, "\tvalue {deleted}\n")); - else if (WT_UPDATE_RESERVED_ISSET(upd)) + else if (upd->type == WT_UPDATE_RESERVED) WT_RET(ds->f(ds, "\tvalue {reserved}\n")); else if (hexbyte) { WT_RET(ds->f(ds, "\t{")); diff --git a/src/btree/bt_delete.c b/src/btree/bt_delete.c index 12c3b044fda..4a88b672d47 100644 --- a/src/btree/bt_delete.c +++ b/src/btree/bt_delete.c @@ -333,7 +333,7 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) */ for (i = 0, size = 0; i < page->entries; ++i) { WT_ERR(__wt_calloc_one(session, &upd)); - WT_UPDATE_DELETED_SET(upd); + upd->type = WT_UPDATE_DELETED; if (page_del == NULL) upd->txnid = WT_TXN_NONE; /* Globally visible */ diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index 72a69e8591c..e6a0f53ab40 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -90,7 +90,8 @@ __col_instantiate(WT_SESSION_IMPL *session, { /* Search the page and add updates. */ WT_RET(__wt_col_search(session, recno, ref, cbt)); - WT_RET(__wt_col_modify(session, cbt, recno, NULL, upd, false, false)); + WT_RET(__wt_col_modify( + session, cbt, recno, NULL, upd, WT_UPDATE_STANDARD)); return (0); } @@ -104,7 +105,8 @@ __row_instantiate(WT_SESSION_IMPL *session, { /* Search the page and add updates. */ WT_RET(__wt_row_search(session, key, ref, cbt, true)); - WT_RET(__wt_row_modify(session, cbt, key, NULL, upd, false, false)); + WT_RET(__wt_row_modify( + session, cbt, key, NULL, upd, WT_UPDATE_STANDARD)); return (0); } @@ -127,7 +129,8 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_UPDATE *first_upd, *last_upd, *upd; size_t incr, total_incr; uint64_t current_recno, las_counter, las_txnid, recno, upd_txnid; - uint32_t las_id, upd_size, session_flags; + uint32_t las_id, session_flags; + uint8_t upd_type; int exact; const uint8_t *p; @@ -188,9 +191,10 @@ __las_page_instantiate(WT_SESSION_IMPL *session, /* Allocate the WT_UPDATE structure. */ WT_ERR(cursor->get_value( - cursor, &upd_txnid, &upd_size, las_value)); - WT_ERR(__wt_update_alloc(session, las_value, - &upd, &incr, upd_size == WT_UPDATE_DELETED_VALUE, false)); + cursor, &upd_txnid, &upd_type, las_value)); + WT_ERR(__wt_update_alloc(session, las_value, &upd, &incr, + upd_type == WT_UPDATE_DELETED ? + WT_UPDATE_DELETED : WT_UPDATE_STANDARD)); total_incr += incr; upd->txnid = upd_txnid; diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 23210a556da..c2c56a18131 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -1424,8 +1424,8 @@ __split_multi_inmem( WT_ERR(__wt_col_search(session, recno, ref, &cbt)); /* Apply the modification. */ - WT_ERR(__wt_col_modify( - session, &cbt, recno, NULL, upd, false, false)); + WT_ERR(__wt_col_modify(session, + &cbt, recno, NULL, upd, WT_UPDATE_STANDARD)); break; case WT_PAGE_ROW_LEAF: /* Build a key. */ @@ -1447,7 +1447,7 @@ __split_multi_inmem( /* Apply the modification. */ WT_ERR(__wt_row_modify( - session, &cbt, key, NULL, upd, false, false)); + session, &cbt, key, NULL, upd, WT_UPDATE_STANDARD)); break; WT_ILLEGAL_VALUE_ERR(session); } diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c index 2b9c9bef8a2..e3b9bbced48 100644 --- a/src/btree/bt_stat.c +++ b/src/btree/bt_stat.c @@ -178,9 +178,9 @@ __stat_page_col_var( */ WT_SKIP_FOREACH(ins, WT_COL_UPDATE(page, cip)) { upd = ins->upd; - if (WT_UPDATE_RESERVED_ISSET(upd)) + if (upd->type == WT_UPDATE_RESERVED) continue; - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { if (!orig_deleted) { ++deleted_cnt; --entry_cnt; @@ -195,9 +195,9 @@ __stat_page_col_var( /* Walk any append list. */ WT_SKIP_FOREACH(ins, WT_COL_APPEND(page)) { - if (WT_UPDATE_RESERVED_ISSET(ins->upd)) + if (ins->upd->type == WT_UPDATE_RESERVED) continue; - if (WT_UPDATE_DELETED_ISSET(ins->upd)) + if (ins->upd->type == WT_UPDATE_DELETED) ++deleted_cnt; else ++entry_cnt; @@ -268,8 +268,8 @@ __stat_page_row_leaf( * key on the page. */ WT_SKIP_FOREACH(ins, WT_ROW_INSERT_SMALLEST(page)) - if (!WT_UPDATE_DELETED_ISSET(ins->upd) && - !WT_UPDATE_RESERVED_ISSET(ins->upd)) + if (ins->upd->type != WT_UPDATE_DELETED && + ins->upd->type != WT_UPDATE_RESERVED) ++entry_cnt; /* @@ -279,8 +279,8 @@ __stat_page_row_leaf( WT_ROW_FOREACH(page, rip, i) { upd = WT_ROW_UPDATE(page, rip); if (upd == NULL || - (!WT_UPDATE_DELETED_ISSET(upd) && - !WT_UPDATE_RESERVED_ISSET(upd))) + (upd->type != WT_UPDATE_DELETED && + upd->type != WT_UPDATE_RESERVED)) ++entry_cnt; if (upd == NULL && (cell = __wt_row_leaf_value_cell(page, rip, NULL)) != NULL && @@ -289,8 +289,8 @@ __stat_page_row_leaf( /* Walk K/V pairs inserted after the on-page K/V pair. */ WT_SKIP_FOREACH(ins, WT_ROW_INSERT(page, rip)) - if (!WT_UPDATE_DELETED_ISSET(ins->upd) && - !WT_UPDATE_RESERVED_ISSET(ins->upd)) + if (ins->upd->type != WT_UPDATE_DELETED && + ins->upd->type != WT_UPDATE_RESERVED) ++entry_cnt; } diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c index b45f369f1c2..c256f03a612 100644 --- a/src/btree/col_modify.c +++ b/src/btree/col_modify.c @@ -17,8 +17,7 @@ static int __col_insert_alloc( */ int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, - uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, - bool is_remove, bool is_reserve) + uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type) { static const WT_ITEM col_fix_remove = { "", 1, NULL, 0, 0 }; WT_BTREE *btree; @@ -38,13 +37,15 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, upd = upd_arg; append = logged = false; - if (is_remove || is_reserve) { + if (modify_type == WT_UPDATE_DELETED || + modify_type == WT_UPDATE_RESERVED) { /* * Fixed-size column-store doesn't have on-page deleted values, * it's a nul byte. */ - if (is_remove && btree->type == BTREE_COL_FIX) { - is_remove = false; + if (modify_type == WT_UPDATE_DELETED && + btree->type == BTREE_COL_FIX) { + modify_type = WT_UPDATE_STANDARD; value = &col_fix_remove; } } else { @@ -89,7 +90,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, /* Allocate a WT_UPDATE structure and transaction ID. */ WT_ERR(__wt_update_alloc(session, - value, &upd, &upd_size, is_remove, is_reserve)); + value, &upd, &upd_size, modify_type)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -150,7 +151,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, if (upd_arg == NULL) { WT_ERR(__wt_update_alloc(session, - value, &upd, &upd_size, is_remove, is_reserve)); + value, &upd, &upd_size, modify_type)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -195,7 +196,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, } /* If the update was successful, add it to the in-memory log. */ - if (logged && !is_reserve) + if (logged && modify_type != WT_UPDATE_RESERVED) WT_ERR(__wt_txn_log_op(session, cbt)); if (0) { diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c index d3b087f92c6..2bf3c2f29bc 100644 --- a/src/btree/row_modify.c +++ b/src/btree/row_modify.c @@ -48,7 +48,7 @@ __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, - WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) + WT_UPDATE *upd_arg, u_int modify_type) { WT_DECL_RET; WT_INSERT *ins; @@ -97,7 +97,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, /* Allocate a WT_UPDATE structure and transaction ID. */ WT_ERR(__wt_update_alloc(session, - value, &upd, &upd_size, is_remove, is_reserve)); + value, &upd, &upd_size, modify_type)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -168,7 +168,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, if (upd_arg == NULL) { WT_ERR(__wt_update_alloc(session, - value, &upd, &upd_size, is_remove, is_reserve)); + value, &upd, &upd_size, modify_type)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -207,7 +207,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, &ins, ins_size, skipdepth)); } - if (logged && !is_reserve) + if (logged && modify_type != WT_UPDATE_RESERVED) WT_ERR(__wt_txn_log_op(session, cbt)); if (0) { @@ -261,7 +261,7 @@ __wt_row_insert_alloc(WT_SESSION_IMPL *session, */ int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, - WT_UPDATE **updp, size_t *sizep, bool is_remove, bool is_reserve) + WT_UPDATE **updp, size_t *sizep, u_int modify_type) { WT_UPDATE *upd; @@ -271,13 +271,10 @@ __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, * Allocate the WT_UPDATE structure and room for the value, then copy * the value into place. */ - if (is_remove || is_reserve) { + if (modify_type == WT_UPDATE_DELETED || + modify_type == WT_UPDATE_RESERVED) WT_RET(__wt_calloc(session, 1, sizeof(WT_UPDATE), &upd)); - if (is_remove) - WT_UPDATE_DELETED_SET(upd); - if (is_reserve) - WT_UPDATE_RESERVED_SET(upd); - } else { + else { WT_RET(__wt_calloc( session, 1, sizeof(WT_UPDATE) + value->size, &upd)); if (value->size != 0) { @@ -285,6 +282,7 @@ __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, memcpy(WT_UPDATE_DATA(upd), value->data, value->size); } } + upd->type = (uint8_t)modify_type; *updp = upd; *sizep = WT_UPDATE_MEMSIZE(upd); diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c index a30cb6f0e17..60750b88900 100644 --- a/src/cursor/cur_backup.c +++ b/src/cursor/cur_backup.c @@ -119,6 +119,7 @@ __wt_curbackup_open(WT_SESSION_IMPL *session, __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_config.c b/src/cursor/cur_config.c index a0b87b2b3c6..6c198315e33 100644 --- a/src/cursor/cur_config.c +++ b/src/cursor/cur_config.c @@ -39,6 +39,7 @@ __wt_curconfig_open(WT_SESSION_IMPL *session, __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_ds.c b/src/cursor/cur_ds.c index e40cb30dd53..4930bcdda13 100644 --- a/src/cursor/cur_ds.c +++ b/src/cursor/cur_ds.c @@ -458,6 +458,7 @@ __wt_curds_open( __curds_search, /* search */ __curds_search_near, /* search-near */ __curds_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __curds_update, /* update */ __curds_remove, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_dump.c b/src/cursor/cur_dump.c index 73328da6246..3e90d321db6 100644 --- a/src/cursor/cur_dump.c +++ b/src/cursor/cur_dump.c @@ -369,6 +369,7 @@ __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) __curdump_search, /* search */ __curdump_search_near, /* search-near */ __curdump_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __curdump_update, /* update */ __curdump_remove, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index c43826799cf..d6cf308077d 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -352,6 +352,11 @@ __curfile_reserve(WT_CURSOR *cursor) WT_ERR(__wt_btcur_reserve(cbt)); + /* + * Reserve maintains a position and key, which doesn't match the library + * API, where reserve maintains a value. Fix the API by searching after + * each successful reserve operation. + */ WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0); @@ -430,6 +435,7 @@ __curfile_create(WT_SESSION_IMPL *session, __curfile_search, /* search */ __curfile_search_near, /* search-near */ __curfile_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __curfile_update, /* update */ __curfile_remove, /* remove */ __curfile_reserve, /* reserve */ diff --git a/src/cursor/cur_index.c b/src/cursor/cur_index.c index fcf00e4fa03..e8fcb1b2702 100644 --- a/src/cursor/cur_index.c +++ b/src/cursor/cur_index.c @@ -449,6 +449,7 @@ __wt_curindex_open(WT_SESSION_IMPL *session, __curindex_search, /* search */ __curindex_search_near, /* search-near */ __wt_cursor_notsup, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index cebf8a7fd6e..e4ccb90139e 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -591,6 +591,7 @@ __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __curjoin_extract_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ @@ -1293,6 +1294,7 @@ __wt_curjoin_open(WT_SESSION_IMPL *session, __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_log.c b/src/cursor/cur_log.c index c8dc44bb392..38e9d4a1784 100644 --- a/src/cursor/cur_log.c +++ b/src/cursor/cur_log.c @@ -342,6 +342,7 @@ __wt_curlog_open(WT_SESSION_IMPL *session, __curlog_search, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_metadata.c b/src/cursor/cur_metadata.c index 9a38996d4ce..d9aeed1fccd 100644 --- a/src/cursor/cur_metadata.c +++ b/src/cursor/cur_metadata.c @@ -550,6 +550,7 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session, __curmetadata_search, /* search */ __curmetadata_search_near, /* search-near */ __curmetadata_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __curmetadata_update, /* update */ __curmetadata_remove, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c index 0bfe5679677..a1ec1d75918 100644 --- a/src/cursor/cur_stat.c +++ b/src/cursor/cur_stat.c @@ -576,6 +576,7 @@ __wt_curstat_open(WT_SESSION_IMPL *session, __curstat_search, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c index e42c5c7766e..f52d60fde01 100644 --- a/src/cursor/cur_std.c +++ b/src/cursor/cur_std.c @@ -89,6 +89,19 @@ __wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) return (__wt_cursor_notsup(cursor)); } +/* + * __wt_cursor_modify_notsup -- + * Unsupported cursor modify. + */ +int +__wt_cursor_modify_notsup(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) +{ + WT_UNUSED(entries); + WT_UNUSED(nentries); + + return (__wt_cursor_notsup(cursor)); +} + /* * __wt_cursor_search_near_notsup -- * Unsupported cursor search-near. @@ -581,6 +594,100 @@ err: API_END(session, ret); return (ret); } +/* + * __cursor_modify -- + * WT_CURSOR->modify default implementation. + */ +static int +__cursor_modify(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_DECL_ITEM(ta); + WT_DECL_ITEM(tb); + WT_DECL_ITEM(tmp); + size_t len, size; + int i; + + CURSOR_UPDATE_API_CALL(cursor, session, modify, NULL); + WT_ERR(__cursor_checkkey(cursor)); + + /* Check for a rational modify vector count. */ + if (nentries <= 0) + WT_ERR_MSG( + session, EINVAL, "Illegal modify vector of %d", nentries); + + WT_STAT_CONN_INCR(session, cursor_modify); + WT_STAT_DATA_INCR(session, cursor_modify); + + /* Acquire position and value. */ + WT_ERR(cursor->search(cursor)); + + /* + * Process the entries to figure out how large a buffer we need. This is + * a bit pessimistic because we're ignoring replacement bytes, but it's + * a simpler calculation. + */ + for (size = cursor->value.size, i = 0; i < nentries; ++i) { + if (entries[i].offset >= size) + size = entries[i].offset; + size += entries[i].data.size; + } + + /* Allocate a pair of buffers. */ + WT_ERR(__wt_scr_alloc(session, size, &ta)); + WT_ERR(__wt_scr_alloc(session, size, &tb)); + + /* Apply the change vector to the value. */ + WT_ERR(__wt_buf_set( + session, ta, cursor->value.data, cursor->value.size)); + for (i = 0; i < nentries; ++i) { + /* Take leading bytes from the original, plus any gap bytes. */ + if (entries[i].offset >= ta->size) { + memcpy(tb->mem, ta->mem, ta->size); + if (entries[i].offset > ta->size) + memset((uint8_t *)tb->mem + ta->size, + '\0', entries[i].offset - ta->size); + } else + if (entries[i].offset > 0) + memcpy(tb->mem, ta->mem, entries[i].offset); + tb->size = entries[i].offset; + + /* Take replacement bytes. */ + if (entries[i].data.size > 0) { + memcpy((uint8_t *)tb->mem + tb->size, + entries[i].data.data, entries[i].data.size); + tb->size += entries[i].data.size; + } + + /* Take trailing bytes from the original. */ + len = entries[i].offset + entries[i].size; + if (ta->size > len) { + memcpy((uint8_t *)tb->mem + tb->size, + (uint8_t *)ta->mem + len, ta->size - len); + tb->size += ta->size - len; + } + WT_ASSERT(session, tb->size <= size); + + tmp = ta; + ta = tb; + tb = tmp; + } + + /* Set the cursor's value. */ + ta->data = ta->mem; + cursor->set_value(cursor, ta); + + /* We know both key and value are set, "overwrite" doesn't matter. */ + ret = cursor->update(cursor); + +err: __wt_scr_free(session, &ta); + __wt_scr_free(session, &tb); + + CURSOR_UPDATE_API_END(session, ret); + return (ret); +} + /* * __wt_cursor_reconfigure -- * Set runtime-configurable settings. @@ -756,6 +863,14 @@ __wt_cursor_init(WT_CURSOR *cursor, if (cval.val != 0) F_SET(cursor, WT_CURSTD_RAW); + /* + * WT_CURSOR.modify supported on 'u' value formats, but may have been + * already initialized. + */ + if (WT_STREQ(cursor->value_format, "u") && + cursor->modify == __wt_cursor_modify_notsup) + cursor->modify = __cursor_modify; + /* * Cursors that are internal to some other cursor (such as file cursors * inside a table cursor) should be closed after the containing cursor. diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c index c6514aaac58..7a04033e9a4 100644 --- a/src/cursor/cur_table.c +++ b/src/cursor/cur_table.c @@ -91,6 +91,7 @@ __wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx, __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __curextract_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ @@ -949,6 +950,7 @@ __wt_curtable_open(WT_SESSION_IMPL *session, __curtable_search, /* search */ __curtable_search_near, /* search-near */ __curtable_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __curtable_update, /* update */ __curtable_remove, /* remove */ __curtable_reserve, /* reserve */ diff --git a/src/docs/Doxyfile b/src/docs/Doxyfile index 3d8c46962f1..e7382e2bc5e 100644 --- a/src/docs/Doxyfile +++ b/src/docs/Doxyfile @@ -1582,6 +1582,7 @@ PREDEFINED = DOXYGEN \ __wt_file_system:=WT_FILE_SYSTEM \ __wt_item:=WT_ITEM \ __wt_lsn:=WT_LSN \ + __wt_modify:=WT_MODIFY \ __wt_session:=WT_SESSION \ __wt_txn_notify:=WT_TXN_NOTIFY \ WT_HANDLE_CLOSED(x):=x \ diff --git a/src/include/btmem.h b/src/include/btmem.h index 6755db81007..4e8d3c05d7d 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -208,7 +208,7 @@ struct __wt_ovfl_txnc { */ #define WT_LAS_FORMAT \ "key_format=" WT_UNCHECKED_STRING(IuQQu) \ - ",value_format=" WT_UNCHECKED_STRING(QIu) + ",value_format=" WT_UNCHECKED_STRING(QBu) /* * WT_PAGE_MODIFY -- @@ -809,11 +809,11 @@ struct __wt_row { /* On-page key, on-page cell, or off-page WT_IKEY */ * Walk the entries of an in-memory row-store leaf page. */ #define WT_ROW_FOREACH(page, rip, i) \ - for ((i) = (page)->entries, \ + for ((i) = (page)->entries, \ (rip) = (page)->pg_row; (i) > 0; ++(rip), --(i)) #define WT_ROW_FOREACH_REVERSE(page, rip, i) \ - for ((i) = (page)->entries, \ - (rip) = (page)->pg_row + ((page)->entries - 1); \ + for ((i) = (page)->entries, \ + (rip) = (page)->pg_row + ((page)->entries - 1); \ (i) > 0; --(rip), --(i)) /* @@ -861,7 +861,7 @@ struct __wt_col { * Walk the entries of variable-length column-store leaf page. */ #define WT_COL_FOREACH(page, cip, i) \ - for ((i) = (page)->entries, \ + for ((i) = (page)->entries, \ (cip) = (page)->pg_var; (i) > 0; ++(cip), --(i)) /* @@ -908,23 +908,16 @@ struct __wt_ikey { * list. */ WT_PACKED_STRUCT_BEGIN(__wt_update) - uint64_t txnid; /* update transaction */ + uint64_t txnid; /* transaction */ WT_UPDATE *next; /* forward-linked list */ - /* - * Use the maximum size and maximum size-1 as is-deleted and is-reserved - * flags (which means we can't store 4GB objects), instead of increasing - * the size of this structure for a flag bit. - */ -#define WT_UPDATE_DELETED_VALUE UINT32_MAX -#define WT_UPDATE_DELETED_SET(u) ((u)->size = WT_UPDATE_DELETED_VALUE) -#define WT_UPDATE_DELETED_ISSET(u) ((u)->size == WT_UPDATE_DELETED_VALUE) + uint32_t size; /* data length */ -#define WT_UPDATE_RESERVED_VALUE (UINT32_MAX - 1) -#define WT_UPDATE_RESERVED_SET(u) ((u)->size = WT_UPDATE_RESERVED_VALUE) -#define WT_UPDATE_RESERVED_ISSET(u) ((u)->size == WT_UPDATE_RESERVED_VALUE) - uint32_t size; /* update length */ +#define WT_UPDATE_STANDARD 0 +#define WT_UPDATE_DELETED 1 +#define WT_UPDATE_RESERVED 2 + uint8_t type; /* type (one byte to conserve memory) */ /* The untyped value immediately follows the WT_UPDATE structure. */ #define WT_UPDATE_DATA(upd) \ @@ -936,9 +929,13 @@ WT_PACKED_STRUCT_BEGIN(__wt_update) * cache overhead calculation. */ #define WT_UPDATE_MEMSIZE(upd) \ - WT_ALIGN(sizeof(WT_UPDATE) + (WT_UPDATE_DELETED_ISSET(upd) || \ - WT_UPDATE_RESERVED_ISSET(upd) ? 0 : (upd)->size), 32) + WT_ALIGN(sizeof(WT_UPDATE) + (upd)->size, 32) WT_PACKED_STRUCT_END +/* + * WT_UPDATE_SIZE is the expected structure size -- we verify the build to + * ensure the compiler hasn't inserted padding. + */ +#define WT_UPDATE_SIZE 21 /* * WT_INSERT -- diff --git a/src/include/cursor.h b/src/include/cursor.h index b044329fbfe..8d2f2c80c2a 100644 --- a/src/include/cursor.h +++ b/src/include/cursor.h @@ -22,6 +22,7 @@ search, \ search_near, \ insert, \ + modify, \ update, \ remove, \ reserve, \ @@ -44,6 +45,7 @@ search, \ search_near, \ insert, \ + modify, \ update, \ remove, \ reserve, \ diff --git a/src/include/extern.h b/src/include/extern.h index a3ce0f3746f..01c21b188c0 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -180,7 +180,7 @@ extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *b extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tree_walk_skip( WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -189,9 +189,9 @@ extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, c extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, const WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, size_t *sizep, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, size_t *sizep, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd); extern void __wt_update_obsolete_free( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd); extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -314,6 +314,7 @@ extern void __wt_cursor_set_key_notsup(WT_CURSOR *cursor, ...); extern void __wt_cursor_set_value_notsup(WT_CURSOR *cursor, ...); extern int __wt_cursor_compare_notsup(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_modify_notsup(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_cursor_search_near_notsup(WT_CURSOR *cursor, int *exact) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_cursor_reconfigure_notsup(WT_CURSOR *cursor, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_cursor_set_notsup(WT_CURSOR *cursor); diff --git a/src/include/log.h b/src/include/log.h index f80514a3546..e7bc28cd220 100644 --- a/src/include/log.h +++ b/src/include/log.h @@ -130,7 +130,7 @@ union __wt_lsn { #define WT_LOG_SLOT_FLAGS(state) ((state) & WT_LOG_SLOT_MASK_ON) #define WT_LOG_SLOT_JOINED(state) (((state) & WT_LOG_SLOT_MASK_OFF) >> 32) #define WT_LOG_SLOT_JOINED_BUFFERED(state) \ - (WT_LOG_SLOT_JOINED(state) & \ + (WT_LOG_SLOT_JOINED(state) & \ (WT_LOG_SLOT_UNBUFFERED - 1)) #define WT_LOG_SLOT_JOIN_REL(j, r, s) (((j) << 32) + (r) + (s)) #define WT_LOG_SLOT_RELEASED(state) ((int64_t)(int32_t)(state)) diff --git a/src/include/lsm.h b/src/include/lsm.h index 08313438eb8..f8d0f480cbb 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -240,11 +240,11 @@ struct __wt_lsm_tree { * area, copying them into place when a statistics cursor is created. */ #define WT_LSM_TREE_STAT_INCR(session, fld) do { \ - if (WT_STAT_ENABLED(session)) \ + if (WT_STAT_ENABLED(session)) \ ++(fld); \ } while (0) #define WT_LSM_TREE_STAT_INCRV(session, fld, v) do { \ - if (WT_STAT_ENABLED(session)) \ + if (WT_STAT_ENABLED(session)) \ (fld) += (int64_t)(v); \ } while (0) int64_t bloom_false_positive; diff --git a/src/include/mutex.h b/src/include/mutex.h index c0e25ebb295..00babd47fbf 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -66,8 +66,8 @@ struct __wt_spinlock { WT_CACHE_LINE_PAD_BEGIN #if SPINLOCK_TYPE == SPINLOCK_GCC volatile int lock; -#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ - SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE ||\ +#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \ + SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE || \ SPINLOCK_TYPE == SPINLOCK_MSVC wt_mutex_t lock; #else diff --git a/src/include/mutex.i b/src/include/mutex.i index 44b8494cdbf..5b14bb24730 100644 --- a/src/include/mutex.i +++ b/src/include/mutex.i @@ -102,8 +102,8 @@ __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) __sync_lock_release(&t->lock); } -#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ - SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE +#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \ + SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE /* * __wt_spin_init -- @@ -142,8 +142,8 @@ __wt_spin_destroy(WT_SESSION_IMPL *session, WT_SPINLOCK *t) } } -#if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ - SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE +#if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \ + SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE /* * __wt_spin_trylock -- diff --git a/src/include/schema.h b/src/include/schema.h index fa836084834..8b8ee5616d1 100644 --- a/src/include/schema.h +++ b/src/include/schema.h @@ -323,7 +323,7 @@ struct __wt_table { F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ } \ if (__handle_write_locked) { \ - __wt_writelock(session, &__conn->dhandle_lock); \ + __wt_writelock(session, &__conn->dhandle_lock); \ F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ } \ } while (0) diff --git a/src/include/stat.h b/src/include/stat.h index beb589dc0ef..fa62cf27693 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -374,9 +374,11 @@ struct __wt_connection_stats { int64_t write_io; int64_t cursor_create; int64_t cursor_insert; + int64_t cursor_modify; int64_t cursor_next; int64_t cursor_prev; int64_t cursor_remove; + int64_t cursor_reserve; int64_t cursor_reset; int64_t cursor_restart; int64_t cursor_search; @@ -609,9 +611,11 @@ struct __wt_dsrc_stats { int64_t cursor_remove_bytes; int64_t cursor_update_bytes; int64_t cursor_insert; + int64_t cursor_modify; int64_t cursor_next; int64_t cursor_prev; int64_t cursor_remove; + int64_t cursor_reserve; int64_t cursor_reset; int64_t cursor_restart; int64_t cursor_search; diff --git a/src/include/txn.i b/src/include/txn.i index 4b6ba17853f..f7321af5b12 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -235,7 +235,7 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd) { /* Skip reserved place-holders, they're never visible. */ for (; upd != NULL; upd = upd->next) - if (!WT_UPDATE_RESERVED_ISSET(upd) && + if (upd->type != WT_UPDATE_RESERVED && __wt_txn_visible(session, upd->txnid)) break; diff --git a/src/include/verify_build.h b/src/include/verify_build.h index d2ccf206990..e93f5931c21 100644 --- a/src/include/verify_build.h +++ b/src/include/verify_build.h @@ -52,6 +52,7 @@ __wt_verify_build(void) /* Check specific structures weren't padded. */ WT_SIZE_CHECK(WT_BLOCK_DESC, WT_BLOCK_DESC_SIZE); WT_SIZE_CHECK(WT_REF, WT_REF_SIZE); + WT_SIZE_CHECK(WT_UPDATE, WT_UPDATE_SIZE); /* Check specific structures were padded. */ #define WT_PADDING_CHECK(s) \ diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index b93fbebef25..5e76b2915b1 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -49,12 +49,6 @@ extern "C" { #define WT_ATTRIBUTE_LIBRARY_VISIBLE __attribute__((visibility("default"))) #endif -#ifdef SWIG -%{ -#include -%} -#endif - /*! * @defgroup wt WiredTiger API * The functions, handles and methods applications use to access and manage @@ -84,6 +78,7 @@ struct __wt_extractor; typedef struct __wt_extractor WT_EXTRACTOR; struct __wt_file_handle; typedef struct __wt_file_handle WT_FILE_HANDLE; struct __wt_file_system; typedef struct __wt_file_system WT_FILE_SYSTEM; struct __wt_item; typedef struct __wt_item WT_ITEM; +struct __wt_modify; typedef struct __wt_modify WT_MODIFY; struct __wt_session; typedef struct __wt_session WT_SESSION; #if defined(SWIGJAVA) @@ -137,6 +132,43 @@ struct __wt_item { #endif }; +/*! + * A set of modifications for a value, including a pointer to new data and a + * length, plus a target offset in the value and an optional length of data + * in the value to be replaced. + * + * WT_MODIFY structures do not need to be cleared before use. + */ +struct __wt_modify { + /*! + * New data. The size of the new data may be zero when no new data is + * provided. + */ + WT_ITEM data; + + /*! + * The zero-based byte offset in the value where the new data is placed. + * + * If the offset is past the end of the value, nul bytes are appended to + * the value up to the specified offset. + */ + size_t offset; + + /*! + * The number of bytes in the value to be replaced. + * + * If the size is zero, no bytes from the value are replaced and the new + * data is inserted. + * + * If the offset is past the end of the value, the size is ignored. + * + * If the offset plus the size overlaps the end of the previous value, + * bytes from the offset to the end of the value are replaced and any + * remaining new data is appended. + */ + size_t size; +}; + /*! * The maximum packed size of a 64-bit integer. The ::wiredtiger_struct_pack * function will pack single long integers into at most this many bytes. @@ -445,6 +477,38 @@ struct __wt_cursor { */ int __F(insert)(WT_CURSOR *cursor); + /*! + * Modify an existing record. + * + * Both the key and value must be set and the record must already exist; + * the record will be updated. + * + * Modification structures are applied in order, and later modifications + * can update earlier modifications. + * + * The modify method is only supported on raw byte arrays accessed using + * a WT_ITEM structure, that is, a format type of \c u. + * + * @snippet ex_all.c Modify an existing record + * + * On success, the cursor ends positioned at the modified record; to + * minimize cursor resources, the WT_CURSOR::reset method should be + * called as soon as the cursor no longer needs that position. + * + * The maximum length of a single column stored in a table is not fixed + * (as it partially depends on the underlying file configuration), but + * is always a small number of bytes less than 4GB. + * + * @param cursor the cursor handle + * @param entries an array of modification data structures + * @param nentries the number of modification data structures + * @errors + * In particular, if \c in_memory is configured for the database and + * the modify requires more than the configured cache size to complete, + * ::WT_CACHE_FULL is returned. + */ + int __F(modify)(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries); + /*! * Update an existing record and optionally insert a record. * @@ -474,7 +538,7 @@ struct __wt_cursor { * @errors * In particular, if \c overwrite=false is configured and no record with * the specified key exists, ::WT_NOTFOUND is returned. - * Also, if \c in_memory is configured for the database and the insert + * Also, if \c in_memory is configured for the database and the update * requires more than the configured cache size to complete, * ::WT_CACHE_FULL is returned. */ @@ -4614,292 +4678,296 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CURSOR_CREATE 1115 /*! cursor: cursor insert calls */ #define WT_STAT_CONN_CURSOR_INSERT 1116 +/*! cursor: cursor modify calls */ +#define WT_STAT_CONN_CURSOR_MODIFY 1117 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1117 +#define WT_STAT_CONN_CURSOR_NEXT 1118 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1118 +#define WT_STAT_CONN_CURSOR_PREV 1119 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1119 +#define WT_STAT_CONN_CURSOR_REMOVE 1120 +/*! cursor: cursor reserve calls */ +#define WT_STAT_CONN_CURSOR_RESERVE 1121 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1120 +#define WT_STAT_CONN_CURSOR_RESET 1122 /*! cursor: cursor restarted searches */ -#define WT_STAT_CONN_CURSOR_RESTART 1121 +#define WT_STAT_CONN_CURSOR_RESTART 1123 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1122 +#define WT_STAT_CONN_CURSOR_SEARCH 1124 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1123 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1125 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1124 +#define WT_STAT_CONN_CURSOR_UPDATE 1126 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1125 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1127 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1126 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1128 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1127 +#define WT_STAT_CONN_DH_SWEEP_REF 1129 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1128 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1130 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1129 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1131 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1130 +#define WT_STAT_CONN_DH_SWEEP_TOD 1132 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1131 +#define WT_STAT_CONN_DH_SWEEPS 1133 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1132 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1134 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1133 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1135 /*! lock: checkpoint lock acquisitions */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1134 +#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1136 /*! lock: checkpoint lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1135 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1137 /*! lock: checkpoint lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1136 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1138 /*! lock: handle-list lock eviction thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1137 +#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1139 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1138 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1140 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1139 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1141 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1140 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1142 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1141 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1143 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1142 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1144 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1143 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1145 /*! lock: table lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_COUNT 1144 +#define WT_STAT_CONN_LOCK_TABLE_COUNT 1146 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1145 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1147 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1146 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1148 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1147 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1149 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1148 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1150 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1149 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1151 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1150 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1152 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1151 +#define WT_STAT_CONN_LOG_FLUSH 1153 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1152 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1154 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1153 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1155 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1154 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1156 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1155 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1157 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1156 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1158 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1157 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1159 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1158 +#define WT_STAT_CONN_LOG_SCANS 1160 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1159 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1161 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1160 +#define WT_STAT_CONN_LOG_WRITE_LSN 1162 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1161 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1163 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1162 +#define WT_STAT_CONN_LOG_SYNC 1164 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1163 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1165 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1164 +#define WT_STAT_CONN_LOG_SYNC_DIR 1166 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1165 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1167 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1166 +#define WT_STAT_CONN_LOG_WRITES 1168 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1167 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1169 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1168 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1170 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1169 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1171 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1170 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1172 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1171 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1173 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1172 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1174 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1173 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1175 /*! log: slot close lost race */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1174 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1176 /*! log: slot close unbuffered waits */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1175 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1177 /*! log: slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1176 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1178 /*! log: slot join atomic update races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1177 +#define WT_STAT_CONN_LOG_SLOT_RACES 1179 /*! log: slot join calls atomic updates raced */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1178 +#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1180 /*! log: slot join calls did not yield */ -#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1179 +#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1181 /*! log: slot join calls found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1180 +#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1182 /*! log: slot join calls slept */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1181 +#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1183 /*! log: slot join calls yielded */ -#define WT_STAT_CONN_LOG_SLOT_YIELD 1182 +#define WT_STAT_CONN_LOG_SLOT_YIELD 1184 /*! log: slot join found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1183 +#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1185 /*! log: slot joins yield time (usecs) */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1184 +#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1186 /*! log: slot transitions unable to find free slot */ -#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1185 +#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1187 /*! log: slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1186 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1188 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1187 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1189 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1188 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1190 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1189 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1191 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1190 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1192 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1191 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1193 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1192 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1194 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1193 +#define WT_STAT_CONN_REC_PAGES 1195 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1194 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1196 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1195 +#define WT_STAT_CONN_REC_PAGE_DELETE 1197 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1196 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1198 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1197 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1199 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1198 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1200 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1199 +#define WT_STAT_CONN_SESSION_OPEN 1201 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1200 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1202 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1201 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1203 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1202 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1204 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1203 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1205 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1204 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1206 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1205 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1207 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1206 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1208 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1207 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1209 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1208 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1210 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1209 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1211 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1210 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1212 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1211 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1213 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1212 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1214 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1213 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1215 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1214 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1216 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1215 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1217 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1216 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1218 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1217 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1219 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1218 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1220 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1219 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1221 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1220 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1222 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1221 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1223 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1222 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1224 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1223 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1225 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1224 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1226 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1225 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1227 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1226 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1228 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1227 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1229 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1228 +#define WT_STAT_CONN_PAGE_SLEEP 1230 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1229 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1231 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1230 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1232 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1231 +#define WT_STAT_CONN_TXN_BEGIN 1233 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1232 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1234 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1233 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1235 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1234 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1236 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1235 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1237 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1236 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1238 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1237 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1239 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1238 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1240 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1239 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1241 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1240 +#define WT_STAT_CONN_TXN_CHECKPOINT 1242 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1241 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1243 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1242 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1244 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1243 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1245 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1244 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1246 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1245 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1247 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1246 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1248 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1247 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1249 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1248 +#define WT_STAT_CONN_TXN_SYNC 1250 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1249 +#define WT_STAT_CONN_TXN_COMMIT 1251 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1250 +#define WT_STAT_CONN_TXN_ROLLBACK 1252 /*! * @} @@ -5181,61 +5249,65 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2092 /*! cursor: insert calls */ #define WT_STAT_DSRC_CURSOR_INSERT 2093 +/*! cursor: modify calls */ +#define WT_STAT_DSRC_CURSOR_MODIFY 2094 /*! cursor: next calls */ -#define WT_STAT_DSRC_CURSOR_NEXT 2094 +#define WT_STAT_DSRC_CURSOR_NEXT 2095 /*! cursor: prev calls */ -#define WT_STAT_DSRC_CURSOR_PREV 2095 +#define WT_STAT_DSRC_CURSOR_PREV 2096 /*! cursor: remove calls */ -#define WT_STAT_DSRC_CURSOR_REMOVE 2096 +#define WT_STAT_DSRC_CURSOR_REMOVE 2097 +/*! cursor: reserve calls */ +#define WT_STAT_DSRC_CURSOR_RESERVE 2098 /*! cursor: reset calls */ -#define WT_STAT_DSRC_CURSOR_RESET 2097 +#define WT_STAT_DSRC_CURSOR_RESET 2099 /*! cursor: restarted searches */ -#define WT_STAT_DSRC_CURSOR_RESTART 2098 +#define WT_STAT_DSRC_CURSOR_RESTART 2100 /*! cursor: search calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH 2099 +#define WT_STAT_DSRC_CURSOR_SEARCH 2101 /*! cursor: search near calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2100 +#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2102 /*! cursor: truncate calls */ -#define WT_STAT_DSRC_CURSOR_TRUNCATE 2101 +#define WT_STAT_DSRC_CURSOR_TRUNCATE 2103 /*! cursor: update calls */ -#define WT_STAT_DSRC_CURSOR_UPDATE 2102 +#define WT_STAT_DSRC_CURSOR_UPDATE 2104 /*! reconciliation: dictionary matches */ -#define WT_STAT_DSRC_REC_DICTIONARY 2103 +#define WT_STAT_DSRC_REC_DICTIONARY 2105 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2104 +#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2106 /*! * reconciliation: internal page key bytes discarded using suffix * compression */ -#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2105 +#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2107 /*! reconciliation: internal page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2106 +#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2108 /*! reconciliation: internal-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2107 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2109 /*! reconciliation: leaf page key bytes discarded using prefix compression */ -#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2108 +#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2110 /*! reconciliation: leaf page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2109 +#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2111 /*! reconciliation: leaf-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2110 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2112 /*! reconciliation: maximum blocks required for a page */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2111 +#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2113 /*! reconciliation: overflow values written */ -#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2112 +#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2114 /*! reconciliation: page checksum matches */ -#define WT_STAT_DSRC_REC_PAGE_MATCH 2113 +#define WT_STAT_DSRC_REC_PAGE_MATCH 2115 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_DSRC_REC_PAGES 2114 +#define WT_STAT_DSRC_REC_PAGES 2116 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_DSRC_REC_PAGES_EVICTION 2115 +#define WT_STAT_DSRC_REC_PAGES_EVICTION 2117 /*! reconciliation: pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE 2116 +#define WT_STAT_DSRC_REC_PAGE_DELETE 2118 /*! session: object compaction */ -#define WT_STAT_DSRC_SESSION_COMPACT 2117 +#define WT_STAT_DSRC_SESSION_COMPACT 2119 /*! session: open cursor count */ -#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2118 +#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2120 /*! transaction: update conflicts */ -#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2119 +#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2121 /*! * @} diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index e62d6cab584..b14e94eb93e 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -1756,6 +1756,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session, __clsm_search, /* search */ __clsm_search_near, /* search-near */ __clsm_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __clsm_update, /* update */ __clsm_remove, /* remove */ __clsm_reserve, /* reserve */ diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 52a279b8c96..8f7769766a9 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -1227,7 +1227,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, } /* Reconciliation should never see a reserved update. */ - WT_ASSERT(session, *updp == NULL || !WT_UPDATE_RESERVED_ISSET(*updp)); + WT_ASSERT(session, + *updp == NULL || (*updp)->type != WT_UPDATE_RESERVED); /* * If all of the updates were aborted, quit. This test is not strictly @@ -1411,14 +1412,14 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, * place a deleted record at the end of the update list. */ if (vpack == NULL || vpack->type == WT_CELL_DEL) - WT_RET(__wt_update_alloc( - session, NULL, &append, ¬used, true, false)); + WT_RET(__wt_update_alloc(session, + NULL, &append, ¬used, WT_UPDATE_DELETED)); else { WT_RET(__wt_scr_alloc(session, 0, &tmp)); if ((ret = __wt_page_cell_data_ref( session, page, vpack, tmp)) == 0) ret = __wt_update_alloc(session, - tmp, &append, ¬used, false, false); + tmp, &append, ¬used, WT_UPDATE_STANDARD); __wt_scr_free(session, &tmp); WT_RET(ret); } @@ -3675,20 +3676,20 @@ __rec_update_las(WT_SESSION_IMPL *session, * restored, obviously. */ do { - if (WT_UPDATE_RESERVED_ISSET(upd)) + if (upd->type == WT_UPDATE_RESERVED) continue; cursor->set_key(cursor, btree_id, &las_addr, ++las_counter, list->onpage_txn, key); - if (WT_UPDATE_DELETED_ISSET(upd)) + if (upd->type == WT_UPDATE_DELETED) las_value.size = 0; else { las_value.data = WT_UPDATE_DATA(upd); las_value.size = upd->size; } cursor->set_value( - cursor, upd->txnid, upd->size, &las_value); + cursor, upd->txnid, upd->type, &las_value); WT_ERR(cursor->insert(cursor)); ++insert_cnt; @@ -4614,7 +4615,7 @@ record_loop: /* update_no_copy = true; /* No data copy */ repeat_count = 1; /* Single record */ - deleted = WT_UPDATE_DELETED_ISSET(upd); + deleted = upd->type == WT_UPDATE_DELETED; if (!deleted) { data = WT_UPDATE_DATA(upd); size = upd->size; @@ -4849,7 +4850,7 @@ compare: /* } } else { deleted = upd == NULL || - WT_UPDATE_DELETED_ISSET(upd); + upd->type == WT_UPDATE_DELETED; if (!deleted) { data = WT_UPDATE_DATA(upd); size = upd->size; @@ -5394,7 +5395,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, __wt_ovfl_cache(session, page, rip, vpack)); /* If this key/value pair was deleted, we're done. */ - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { /* * Overflow keys referencing discarded values * are no longer useful, discard the backing @@ -5604,7 +5605,7 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) for (; ins != NULL; ins = WT_SKIP_NEXT(ins)) { /* Look for an update. */ WT_RET(__rec_txn_read(session, r, ins, NULL, NULL, &upd)); - if (upd == NULL || WT_UPDATE_DELETED_ISSET(upd)) + if (upd == NULL || upd->type == WT_UPDATE_DELETED) continue; if (upd->size == 0) /* Build value cell. */ diff --git a/src/support/stat.c b/src/support/stat.c index 8711e6b9bc1..bc40244f5e6 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -97,9 +97,11 @@ static const char * const __stats_dsrc_desc[] = { "cursor: cursor-remove key bytes removed", "cursor: cursor-update value bytes updated", "cursor: insert calls", + "cursor: modify calls", "cursor: next calls", "cursor: prev calls", "cursor: remove calls", + "cursor: reserve calls", "cursor: reset calls", "cursor: restarted searches", "cursor: search calls", @@ -259,9 +261,11 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats) stats->cursor_remove_bytes = 0; stats->cursor_update_bytes = 0; stats->cursor_insert = 0; + stats->cursor_modify = 0; stats->cursor_next = 0; stats->cursor_prev = 0; stats->cursor_remove = 0; + stats->cursor_reserve = 0; stats->cursor_reset = 0; stats->cursor_restart = 0; stats->cursor_search = 0; @@ -410,9 +414,11 @@ __wt_stat_dsrc_aggregate_single( to->cursor_remove_bytes += from->cursor_remove_bytes; to->cursor_update_bytes += from->cursor_update_bytes; to->cursor_insert += from->cursor_insert; + to->cursor_modify += from->cursor_modify; to->cursor_next += from->cursor_next; to->cursor_prev += from->cursor_prev; to->cursor_remove += from->cursor_remove; + to->cursor_reserve += from->cursor_reserve; to->cursor_reset += from->cursor_reset; to->cursor_restart += from->cursor_restart; to->cursor_search += from->cursor_search; @@ -588,9 +594,11 @@ __wt_stat_dsrc_aggregate( to->cursor_remove_bytes += WT_STAT_READ(from, cursor_remove_bytes); to->cursor_update_bytes += WT_STAT_READ(from, cursor_update_bytes); to->cursor_insert += WT_STAT_READ(from, cursor_insert); + to->cursor_modify += WT_STAT_READ(from, cursor_modify); to->cursor_next += WT_STAT_READ(from, cursor_next); to->cursor_prev += WT_STAT_READ(from, cursor_prev); to->cursor_remove += WT_STAT_READ(from, cursor_remove); + to->cursor_reserve += WT_STAT_READ(from, cursor_reserve); to->cursor_reset += WT_STAT_READ(from, cursor_reset); to->cursor_restart += WT_STAT_READ(from, cursor_restart); to->cursor_search += WT_STAT_READ(from, cursor_search); @@ -741,9 +749,11 @@ static const char * const __stats_connection_desc[] = { "connection: total write I/Os", "cursor: cursor create calls", "cursor: cursor insert calls", + "cursor: cursor modify calls", "cursor: cursor next calls", "cursor: cursor prev calls", "cursor: cursor remove calls", + "cursor: cursor reserve calls", "cursor: cursor reset calls", "cursor: cursor restarted searches", "cursor: cursor search calls", @@ -1034,9 +1044,11 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->write_io = 0; stats->cursor_create = 0; stats->cursor_insert = 0; + stats->cursor_modify = 0; stats->cursor_next = 0; stats->cursor_prev = 0; stats->cursor_remove = 0; + stats->cursor_reserve = 0; stats->cursor_reset = 0; stats->cursor_restart = 0; stats->cursor_search = 0; @@ -1347,9 +1359,11 @@ __wt_stat_connection_aggregate( to->write_io += WT_STAT_READ(from, write_io); to->cursor_create += WT_STAT_READ(from, cursor_create); to->cursor_insert += WT_STAT_READ(from, cursor_insert); + to->cursor_modify += WT_STAT_READ(from, cursor_modify); to->cursor_next += WT_STAT_READ(from, cursor_next); to->cursor_prev += WT_STAT_READ(from, cursor_prev); to->cursor_remove += WT_STAT_READ(from, cursor_remove); + to->cursor_reserve += WT_STAT_READ(from, cursor_reserve); to->cursor_reset += WT_STAT_READ(from, cursor_reset); to->cursor_restart += WT_STAT_READ(from, cursor_restart); to->cursor_search += WT_STAT_READ(from, cursor_search); diff --git a/src/txn/txn.c b/src/txn/txn.c index ac4be37f855..d9edbb80564 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -600,7 +600,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) * Switch reserved operations to abort to simplify * obsolete update list truncation. */ - if (WT_UPDATE_RESERVED_ISSET(op->u.upd)) + if (op->u.upd->type == WT_UPDATE_RESERVED) op->u.upd->txnid = WT_TXN_ABORTED; break; case WT_TXN_OP_REF: diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c index fae2027e1ec..74dc679a6ef 100644 --- a/src/txn/txn_log.c +++ b/src/txn/txn_log.c @@ -82,12 +82,12 @@ __txn_op_log(WT_SESSION_IMPL *session, * or update, all of which require log records. We shouldn't ever log * reserve operations. */ - WT_ASSERT(session, !WT_UPDATE_RESERVED_ISSET(upd)); + WT_ASSERT(session, upd->type != WT_UPDATE_RESERVED); if (cbt->btree->type == BTREE_ROW) { #ifdef HAVE_DIAGNOSTIC __txn_op_log_row_key_check(session, cbt); #endif - if (WT_UPDATE_DELETED_ISSET(upd)) + if (upd->type == WT_UPDATE_DELETED) WT_RET(__wt_logop_row_remove_pack( session, logrec, op->fileid, &cursor->key)); else @@ -97,7 +97,7 @@ __txn_op_log(WT_SESSION_IMPL *session, recno = WT_INSERT_RECNO(cbt->ins); WT_ASSERT(session, recno != WT_RECNO_OOB); - if (WT_UPDATE_DELETED_ISSET(upd)) + if (upd->type == WT_UPDATE_DELETED) WT_RET(__wt_logop_col_remove_pack( session, logrec, op->fileid, recno)); else diff --git a/test/csuite/scope/main.c b/test/csuite/scope/main.c index 8b9a79decd0..83d6bd479d9 100644 --- a/test/csuite/scope/main.c +++ b/test/csuite/scope/main.c @@ -28,7 +28,7 @@ #include "test_util.h" #define KEY "key" -#define VALUE "value" +#define VALUE "value,value,value" static int ignore_errors; @@ -63,44 +63,55 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) { struct { const char *op; - enum { INSERT, SEARCH, SEARCH_NEAR, + enum { INSERT, MODIFY, SEARCH, SEARCH_NEAR, REMOVE, REMOVE_POS, RESERVE, UPDATE } func; const char *config; } *op, ops[] = { /* - * The ops order is fixed and shouldn't change, that is, insert - * has to happen first so search, update and remove operations - * are possible, and remove has to be last. + * The ops order is specific: insert has to happen first so + * other operations are possible, and remove has to be last. */ { "insert", INSERT, NULL, }, { "search", SEARCH, NULL, }, { "search", SEARCH_NEAR, NULL, }, { "reserve", RESERVE, NULL, }, + { "insert", MODIFY, NULL, }, { "update", UPDATE, NULL, }, { "remove", REMOVE, NULL, }, { "remove", REMOVE_POS, NULL, }, { NULL, INSERT, NULL } }; WT_CURSOR *cursor; +#define MODIFY_ENTRIES 2 + WT_MODIFY entries[MODIFY_ENTRIES]; + WT_ITEM vu; uint64_t keyr; - const char *key, *value; + const char *key, *vs; char keybuf[100], valuebuf[100]; int exact; - bool recno; + bool recno, vstring; /* Reserve requires a running transaction. */ testutil_check(session->begin_transaction(session, NULL)); cursor = NULL; for (op = ops; op->op != NULL; op++) { - key = value = NULL; + key = vs = NULL; + memset(&vu, 0, sizeof(vu)); /* Open a cursor. */ if (cursor != NULL) testutil_check(cursor->close(cursor)); testutil_check(session->open_cursor( session, uri, NULL, op->config, &cursor)); + + /* Operations change based on the key/value formats. */ recno = strcmp(cursor->key_format, "r") == 0; + vstring = strcmp(cursor->value_format, "S") == 0; + + /* Modify is only possible with "item" values. */ + if (vstring && op->func == MODIFY) + continue; /* * Set up application buffers so we can detect overwrites @@ -114,7 +125,12 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) cursor->set_key(cursor, keybuf); } strcpy(valuebuf, VALUE); - cursor->set_value(cursor, valuebuf); + if (vstring) + cursor->set_value(cursor, valuebuf); + else { + vu.size = strlen(vu.data = valuebuf); + cursor->set_value(cursor, &vu); + } /* * The application must keep key and value memory valid until @@ -127,6 +143,20 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) case INSERT: testutil_check(cursor->insert(cursor)); break; + case MODIFY: + /* Modify, but don't really change anything. */ + entries[0].data.data = &VALUE[0]; + entries[0].data.size = 2; + entries[0].offset = 0; + entries[0].size = 2; + entries[1].data.data = &VALUE[3]; + entries[1].data.size = 5; + entries[1].offset = 3; + entries[1].size = 5; + + testutil_check( + cursor->modify(cursor, entries, MODIFY_ENTRIES)); + break; case SEARCH: testutil_check(cursor->search(cursor)); break; @@ -180,7 +210,12 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) else testutil_assert( cursor->get_key(cursor, &key) != 0); - testutil_assert(cursor->get_value(cursor, &value) != 0); + if (vstring) + testutil_assert( + cursor->get_value(cursor, &vs) != 0); + else + testutil_assert( + cursor->get_value(cursor, &vu) != 0); testutil_assert(ignore_errors == 0); break; case REMOVE_POS: @@ -201,16 +236,22 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) testutil_assert(strcmp(key, KEY) == 0); } ignore_errors = 1; - testutil_assert(cursor->get_value(cursor, &value) != 0); + if (vstring) + testutil_assert( + cursor->get_value(cursor, &vs) != 0); + else + testutil_assert( + cursor->get_value(cursor, &vu) != 0); testutil_assert(ignore_errors == 0); break; + case MODIFY: case RESERVE: case SEARCH: case SEARCH_NEAR: case UPDATE: /* - * Reserve, search, search-near and update position the - * cursor and have both a key and value. + * Modify, reserve, search, search-near and update all + * position the cursor and have both a key and value. * * Any key/value should not reference application * memory. @@ -225,9 +266,19 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) testutil_assert(key != keybuf); testutil_assert(strcmp(key, KEY) == 0); } - testutil_assert(cursor->get_value(cursor, &value) == 0); - testutil_assert(value != valuebuf); - testutil_assert(strcmp(value, VALUE) == 0); + if (vstring) { + testutil_assert( + cursor->get_value(cursor, &vs) == 0); + testutil_assert(vs != valuebuf); + testutil_assert(strcmp(vs, VALUE) == 0); + } else { + testutil_assert( + cursor->get_value(cursor, &vu) == 0); + testutil_assert(vu.data != valuebuf); + testutil_assert(vu.size == strlen(VALUE)); + testutil_assert( + memcmp(vu.data, VALUE, strlen(VALUE)) == 0); + } break; } @@ -239,9 +290,16 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) if (recno) cursor->set_key(cursor, (uint64_t)1); else { - cursor->set_key(cursor, KEY); + strcpy(keybuf, KEY); + cursor->set_key(cursor, keybuf); + } + strcpy(valuebuf, VALUE); + if (vstring) + cursor->set_value(cursor, valuebuf); + else { + vu.size = strlen(vu.data = valuebuf); + cursor->set_value(cursor, &vu); } - cursor->set_value(cursor, VALUE); testutil_check(cursor->insert(cursor)); } } @@ -272,11 +330,19 @@ main(int argc, char *argv[]) wiredtiger_open(opts->home, &event_handler, "create", &opts->conn)); run(opts->conn, "file:file.SS", "key_format=S,value_format=S"); + run(opts->conn, "file:file.Su", "key_format=S,value_format=u"); run(opts->conn, "file:file.rS", "key_format=r,value_format=S"); + run(opts->conn, "file:file.ru", "key_format=r,value_format=u"); + run(opts->conn, "lsm:lsm.SS", "key_format=S,value_format=S"); + run(opts->conn, "lsm:lsm.Su", "key_format=S,value_format=S"); run(opts->conn, "lsm:lsm.rS", "key_format=r,value_format=S"); + run(opts->conn, "lsm:lsm.ru", "key_format=r,value_format=S"); + run(opts->conn, "table:table.SS", "key_format=S,value_format=S"); + run(opts->conn, "table:table.Su", "key_format=S,value_format=u"); run(opts->conn, "table:table.rS", "key_format=r,value_format=S"); + run(opts->conn, "table:table.ru", "key_format=r,value_format=u"); testutil_cleanup(opts); diff --git a/test/format/config.c b/test/format/config.c index ce1dc6d6e8e..2685438af00 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -489,6 +489,8 @@ config_pct(void) #define CONFIG_DELETE_ENTRY 0 { "delete_pct", &g.c_delete_pct, 0 }, { "insert_pct", &g.c_insert_pct, 0 }, +#define CONFIG_MODIFY_ENTRY 2 + { "modify_pct", &g.c_modify_pct, 0 }, { "read_pct", &g.c_read_pct, 0 }, { "write_pct", &g.c_write_pct, 0 }, }; @@ -508,6 +510,16 @@ config_pct(void) testutil_die(EINVAL, "operation percentages total to more than 100%%"); + /* Cursor modify isn't possible for fixed-length column store. */ + if (g.type == FIX) { + if (config_is_perm("modify_pct")) + testutil_die(EINVAL, + "WT_CURSOR.modify not supported by fixed-length " + "column store or LSM"); + list[CONFIG_MODIFY_ENTRY].order = 0; + *list[CONFIG_MODIFY_ENTRY].vp = 0; + } + /* * If the delete percentage isn't nailed down, periodically set it to * 0 so salvage gets run. Don't do it on the first run, all our smoke @@ -547,8 +559,9 @@ config_pct(void) list[max_slot].order = 0; pct -= *list[max_slot].vp; } - testutil_assert(g.c_delete_pct + - g.c_insert_pct + g.c_read_pct + g.c_write_pct == 100); + + testutil_assert(g.c_delete_pct + g.c_insert_pct + + g.c_modify_pct + g.c_read_pct + g.c_write_pct == 100); } /* diff --git a/test/format/config.h b/test/format/config.h index bc809a764ce..3a41411e104 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -238,6 +238,10 @@ static CONFIG c[] = { "configure for mmap operations", /* 90% */ C_BOOL, 90, 0, 0, &g.c_mmap, NULL }, + { "modify_pct", + "percent operations that are value modifications", + C_IGNORE, 0, 0, 100, &g.c_modify_pct, NULL }, + { "ops", "the number of modification operations done per run", 0x0, 0, M(2), M(100), &g.c_ops, NULL }, @@ -323,7 +327,7 @@ static CONFIG c[] = { C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_config_open }, { "write_pct", - "percent operations that are writes", + "percent operations that are value updates", C_IGNORE, 0, 0, 100, &g.c_write_pct, NULL }, { NULL, NULL, 0x0, 0, 0, 0, NULL, NULL } diff --git a/test/format/format.h b/test/format/format.h index fa898e439be..104ee1553f4 100644 --- a/test/format/format.h +++ b/test/format/format.h @@ -78,6 +78,8 @@ #define FORMAT_OPERATION_REPS 3 /* 3 thread operations sets */ +#define MAX_MODIFY_ENTRIES 5 /* maximum change vectors */ + typedef struct { char *home; /* Home directory */ char *home_backup; /* Hot-backup directory */ @@ -147,28 +149,28 @@ typedef struct { uint32_t c_bloom_hash_count; uint32_t c_bloom_oldest; uint32_t c_cache; - uint32_t c_compact; uint32_t c_checkpoints; - char *c_checksum; + char *c_checksum; uint32_t c_chunk_size; - char *c_compression; - char *c_encryption; - char *c_config_open; + uint32_t c_compact; + char *c_compression; + char *c_config_open; uint32_t c_data_extend; - char *c_data_source; + char *c_data_source; uint32_t c_delete_pct; uint32_t c_dictionary; uint32_t c_direct_io; + char *c_encryption; uint32_t c_evict_max; + char *c_file_type; uint32_t c_firstfit; - char *c_file_type; uint32_t c_huffman_key; uint32_t c_huffman_value; uint32_t c_in_memory; uint32_t c_insert_pct; uint32_t c_internal_key_truncation; uint32_t c_intl_page_max; - char *c_isolation; + char *c_isolation; uint32_t c_key_gap; uint32_t c_key_max; uint32_t c_key_min; @@ -176,22 +178,23 @@ typedef struct { uint32_t c_leak_memory; uint32_t c_logging; uint32_t c_logging_archive; - char *c_logging_compression; + char *c_logging_compression; uint32_t c_logging_prealloc; uint32_t c_long_running_txn; uint32_t c_lsm_worker_threads; uint32_t c_merge_max; uint32_t c_mmap; + uint32_t c_modify_pct; uint32_t c_ops; - uint32_t c_quiet; uint32_t c_prefix_compression; uint32_t c_prefix_compression_min; + uint32_t c_quiet; + uint32_t c_read_pct; + uint32_t c_rebalance; uint32_t c_repeat_data_pct; uint32_t c_reverse; uint32_t c_rows; uint32_t c_runs; - uint32_t c_read_pct; - uint32_t c_rebalance; uint32_t c_salvage; uint32_t c_split_pct; uint32_t c_statistics; diff --git a/test/format/ops.c b/test/format/ops.c index 6e3e3b783c5..02cce77eec2 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -29,6 +29,8 @@ #include "format.h" static int col_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); +static int col_modify( + TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); static int col_reserve(WT_CURSOR *, uint64_t, bool); static int col_update( @@ -37,6 +39,8 @@ static int nextprev(WT_CURSOR *, int); static void *ops(void *); static int row_insert( TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); +static int row_modify( + TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); static int row_reserve(WT_CURSOR *, WT_ITEM *, uint64_t, bool); static int row_update( @@ -403,7 +407,7 @@ snap_check(WT_CURSOR *cursor, static void * ops(void *arg) { - enum { INSERT, READ, REMOVE, UPDATE } op; + enum { INSERT, MODIFY, READ, REMOVE, UPDATE } op; SNAP_OPS *snap, snap_list[64]; TINFO *tinfo; WT_CONNECTION *conn; @@ -610,11 +614,12 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ op = REMOVE; else if (i < g.c_delete_pct + g.c_insert_pct) op = INSERT; - else if (i < - g.c_delete_pct + g.c_insert_pct + g.c_write_pct) + else if (i < g.c_delete_pct + + g.c_insert_pct + g.c_modify_pct) + op = MODIFY; + else if (i < g.c_delete_pct + + g.c_insert_pct + g.c_modify_pct + g.c_write_pct) op = UPDATE; - else - op = READ; } /* @@ -698,6 +703,30 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ testutil_assert(ret == 0 || ret == WT_ROLLBACK); } break; + case MODIFY: + ++tinfo->update; + switch (g.type) { + case ROW: + ret = row_modify(tinfo, cursor, + key, value, keyno, positioned); + break; + case VAR: + ret = col_modify(tinfo, cursor, + key, value, keyno, positioned); + break; + } + if (ret == 0) { + positioned = true; + if (SNAP_TRACK) + snap_track(snap++, keyno, NULL, value); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) + goto deadlock; + testutil_assert(ret == 0 || + ret == WT_NOTFOUND || ret == WT_ROLLBACK); + } + break; case READ: ++tinfo->search; ret = read_row(cursor, key, value, keyno); @@ -742,17 +771,15 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ case UPDATE: update_instead_of_insert: ++tinfo->update; - - /* Update the row. */ switch (g.type) { case ROW: - ret = row_update(tinfo, - cursor, key, value, keyno, positioned); + ret = row_update(tinfo, cursor, + key, value, keyno, positioned); break; case FIX: case VAR: - ret = col_update(tinfo, - cursor, key, value, keyno, positioned); + ret = col_update(tinfo, cursor, + key, value, keyno, positioned); break; } if (ret == 0) { @@ -1168,6 +1195,235 @@ col_reserve(WT_CURSOR *cursor, uint64_t keyno, bool positioned) return (0); } +/* + * modify_build -- + * Generate a set of modify vectors, and copy what the final result + * should be into the value buffer. + */ +static bool +modify_build(TINFO *tinfo, + WT_CURSOR *cursor, WT_MODIFY *entries, int *nentriesp, WT_ITEM *value) +{ + static char repl[64]; + size_t len, size; + u_int i, nentries; + WT_ITEM *ta, _ta, *tb, _tb, *tmp; + + if (repl[0] == '\0') + memset(repl, '+', sizeof(repl)); + + ta = &_ta; + memset(ta, 0, sizeof(*ta)); + tb = &_tb; + memset(tb, 0, sizeof(*tb)); + + testutil_check(cursor->get_value(cursor, value)); + + /* + * Randomly select a number of byte changes, offsets and lengths. Start + * at least 11 bytes in so we skip the leading key information. + */ + nentries = mmrand(&tinfo->rnd, 1, MAX_MODIFY_ENTRIES); + for (i = 0; i < nentries; ++i) { + entries[i].data.data = repl; + entries[i].data.size = (size_t)mmrand(&tinfo->rnd, 0, 10); + entries[i].offset = (size_t)mmrand(&tinfo->rnd, 20, 40); + entries[i].size = (size_t)mmrand(&tinfo->rnd, 0, 10); + } + + /* + * Process the entries to figure out how large a buffer we need. This is + * a bit pessimistic because we're ignoring replacement bytes, but it's + * a simpler calculation. + */ + for (size = cursor->value.size, i = 0; i < nentries; ++i) { + if (entries[i].offset >= size) + size = entries[i].offset; + size += entries[i].data.size; + } + + /* If size is larger than the available buffer size, skip this one. */ + if (size >= value->memsize) + return (false); + + /* Allocate a pair of buffers. */ + ta->mem = dcalloc(size, sizeof(uint8_t)); + tb->mem = dcalloc(size, sizeof(uint8_t)); + + /* + * Use a brute-force process to create the value WiredTiger will create + * from this change vector. Don't do anything tricky to speed it up, we + * want to use a different algorithm from WiredTiger's, the idea is to + * bug-check the library. + */ + memcpy(ta->mem, value->data, value->size); + ta->size = value->size; + for (i = 0; i < nentries; ++i) { + /* Take leading bytes from the original, plus any gap bytes. */ + if (entries[i].offset >= ta->size) { + memcpy(tb->mem, ta->mem, ta->size); + if (entries[i].offset > ta->size) + memset((uint8_t *)tb->mem + ta->size, + '\0', entries[i].offset - ta->size); + } else + if (entries[i].offset > 0) + memcpy(tb->mem, ta->mem, entries[i].offset); + tb->size = entries[i].offset; + + /* Take replacement bytes. */ + if (entries[i].data.size > 0) { + memcpy((uint8_t *)tb->mem + tb->size, + entries[i].data.data, entries[i].data.size); + tb->size += entries[i].data.size; + } + + /* Take trailing bytes from the original. */ + len = entries[i].offset + entries[i].size; + if (ta->size > len) { + memcpy((uint8_t *)tb->mem + tb->size, + (uint8_t *)ta->mem + len, ta->size - len); + tb->size += ta->size - len; + } + testutil_assert(tb->size <= size); + + tmp = ta; + ta = tb; + tb = tmp; + } + + /* Copy the expected result into the value structure. */ + memcpy(value->mem, ta->mem, ta->size); + value->data = value->mem; + value->size = ta->size; + + free(ta->mem); + free(tb->mem); + + *nentriesp = (int)nentries; + return (true); +} + +/* + * row_modify -- + * Modify a row in a row-store file. + */ +static int +row_modify(TINFO *tinfo, WT_CURSOR *cursor, + WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) +{ + WT_DECL_RET; + WT_MODIFY entries[MAX_MODIFY_ENTRIES]; + int nentries; + + if (!positioned) { + key_gen(key, keyno); + cursor->set_key(cursor, key); + switch (ret = cursor->search(cursor)) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: + return (WT_ROLLBACK); + case WT_NOTFOUND: + return (WT_NOTFOUND); + default: + testutil_die(ret, + "row_modify: read row %" PRIu64 " by key", keyno); + } + } + + /* + * Generate a set of change vectors and copy the expected result into + * the value buffer. If the return value is non-zero, there wasn't a + * big enough value to work with, or for some reason we couldn't build + * a reasonable change vector. + */ + ret = WT_NOTFOUND; + if (modify_build(tinfo, cursor, entries, &nentries, value)) + ret = cursor->modify(cursor, entries, nentries); + switch (ret) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: + return (WT_ROLLBACK); + case WT_NOTFOUND: + return (WT_NOTFOUND); + default: + testutil_die(ret, + "row_modify: modify row %" PRIu64 " by key", keyno); + } + +#ifdef HAVE_BERKELEY_DB + if (!SINGLETHREADED) + return (0); + + bdb_update(key->data, key->size, value->data, value->size); +#endif + return (0); +} + +/* + * col_modify -- + * Modify a row in a column-store file. + */ +static int +col_modify(TINFO *tinfo, WT_CURSOR *cursor, + WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) +{ + WT_DECL_RET; + WT_MODIFY entries[MAX_MODIFY_ENTRIES]; + int nentries; + + if (!positioned) { + cursor->set_key(cursor, keyno); + switch (ret = cursor->search(cursor)) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: + return (WT_ROLLBACK); + case WT_NOTFOUND: + return (WT_NOTFOUND); + default: + testutil_die(ret, + "col_modify: read row %" PRIu64, keyno); + } + } + + /* + * Generate a set of change vectors and copy the expected result into + * the value buffer. If the return value is non-zero, there wasn't a + * big enough value to work with, or for some reason we couldn't build + * a reasonable change vector. + */ + ret = WT_NOTFOUND; + if (modify_build(tinfo, cursor, entries, &nentries, value)) + ret = cursor->modify(cursor, entries, nentries); + switch (ret) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: + return (WT_ROLLBACK); + case WT_NOTFOUND: + return (WT_NOTFOUND); + default: + testutil_die(ret, "col_modify: modify row %" PRIu64, keyno); + } + +#ifdef HAVE_BERKELEY_DB + if (!SINGLETHREADED) + return (0); + + key_gen(key, keyno); + bdb_update(key->data, key->size, value->data, value->size); +#else + (void)key; /* [-Wunused-variable] */ +#endif + return (0); +} + /* * row_update -- * Update a row in a row-store file. diff --git a/test/suite/test_cursor12.py b/test/suite/test_cursor12.py new file mode 100644 index 00000000000..827f37cfcef --- /dev/null +++ b/test/suite/test_cursor12.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from wtscenario import make_scenarios + +# test_cursor12.py +# Test cursor modify call +class test_cursor12(wttest.WiredTigerTestCase): + types = [ + ('file', dict(uri='file:modify')), + ('lsm', dict(uri='lsm:modify')), + ('table', dict(uri='table:modify')), + ] + scenarios = make_scenarios(types) + + # Smoke-test the modify API. + def test_modify_smoke(self): + # List with original value, final value, and modifications to get + # there. + list = [ + { + 'o' : 'ABCDEFGH', # no operation + 'f' : 'ABCDEFGH', + 'mods' : [['', 0, 0]] + },{ + 'o' : 'ABCDEFGH', # no operation with offset + 'f' : 'ABCDEFGH', + 'mods' : [['', 4, 0]] + },{ + 'o' : 'ABCDEFGH', # rewrite beginning + 'f' : '--CDEFGH', + 'mods' : [['--', 0, 2]] + },{ + 'o' : 'ABCDEFGH', # rewrite end + 'f' : 'ABCDEF--', + 'mods' : [['--', 6, 2]] + },{ + 'o' : 'ABCDEFGH', # append + 'f' : 'ABCDEFGH--', + 'mods' : [['--', 8, 2]] + },{ + 'o' : 'ABCDEFGH', # append with gap + 'f' : 'ABCDEFGH\00\00--', + 'mods' : [['--', 10, 2]] + },{ + 'o' : 'ABCDEFGH', # multiple replacements + 'f' : 'A-C-E-G-', + 'mods' : [['-', 1, 1], ['-', 3, 1], ['-', 5, 1], ['-', 7, 1]] + },{ + 'o' : 'ABCDEFGH', # multiple overlapping replacements + 'f' : 'A-CDEFGH', + 'mods' : [['+', 1, 1], ['+', 1, 1], ['+', 1, 1], ['-', 1, 1]] + },{ + 'o' : 'ABCDEFGH', # multiple overlapping gap replacements + 'f' : 'ABCDEFGH\00\00--', + 'mods' : [['+', 10, 1], ['+', 10, 1], ['+', 10, 1], ['--', 10, 2]] + },{ + 'o' : 'ABCDEFGH', # shrink beginning + 'f' : '--EFGH', + 'mods' : [['--', 0, 4]] + },{ + 'o' : 'ABCDEFGH', # shrink middle + 'f' : 'AB--GH', + 'mods' : [['--', 2, 4]] + },{ + 'o' : 'ABCDEFGH', # shrink end + 'f' : 'ABCD--', + 'mods' : [['--', 4, 4]] + },{ + 'o' : 'ABCDEFGH', # grow beginning + 'f' : '--ABCDEFGH', + 'mods' : [['--', 0, 0]] + },{ + 'o' : 'ABCDEFGH', # grow middle + 'f' : 'ABCD--EFGH', + 'mods' : [['--', 4, 0]] + },{ + 'o' : 'ABCDEFGH', # grow end + 'f' : 'ABCDEFGH--', + 'mods' : [['--', 8, 0]] + },{ + 'o' : 'ABCDEFGH', # discard beginning + 'f' : 'EFGH', + 'mods' : [['', 0, 4]] + },{ + 'o' : 'ABCDEFGH', # discard middle + 'f' : 'ABGH', + 'mods' : [['', 2, 4]] + },{ + 'o' : 'ABCDEFGH', # discard end + 'f' : 'ABCD', + 'mods' : [['', 4, 4]] + },{ + 'o' : 'ABCDEFGH', # overlap the end and append + 'f' : 'ABCDEF--XX', + 'mods' : [['--XX', 6, 2]] + },{ + 'o' : 'ABCDEFGH', # overlap the end with incorrect size + 'f' : 'ABCDEFG01234567', + 'mods' : [['01234567', 7, 2000]] + } + ] + + self.session.create(self.uri, 'key_format=S,value_format=u') + cursor = self.session.open_cursor(self.uri, None, None) + + # For each test in the list, set the original value, apply modifications + # in order, then confirm the final state. + for i in list: + cursor['ABC'] = i['o'] + + mods = [] + for j in i['mods']: + mod = wiredtiger.Modify(j[0], j[1], j[2]) + mods.append(mod) + + cursor.set_key('ABC') + cursor.modify(mods) + self.assertEquals(str(cursor['ABC']), i['f']) + + # Check that modify returns not-found after a delete. + def test_modify_delete(self): + self.session.create(self.uri, 'key_format=S,value_format=u') + cursor = self.session.open_cursor(self.uri, None, None) + cursor['ABC'] = 'ABCDEFGH' + cursor.set_key('ABC') + cursor.remove() + + mods = [] + mod = wiredtiger.Modify('ABCD', 3, 3) + mods.append(mod) + + cursor.set_key('ABC') + #self.assertEqual(cursor.modify(mods), wiredtiger.WT_NOTFOUND) + self.assertRaises( + wiredtiger.WiredTigerError, lambda:cursor.modify(mods)) + +if __name__ == '__main__': + wttest.run() -- cgit v1.2.1 From 92dde8bce6cf16c079baf47d4abd35a4e23195c4 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Wed, 24 May 2017 15:24:19 -0400 Subject: Revert "WT-2972 Add interface allowing partial updates to existing values (#3406)" (#3434) This reverts commit a3073be71b1406c7eef60e35fcf2cdc1174e752b. --- dist/s_define.list | 2 +- dist/s_string.ok | 3 - dist/stat_data.py | 6 +- examples/c/ex_all.c | 33 -- .../java/com/wiredtiger/examples/ex_cursor.java | 41 --- lang/java/Makefile.am | 2 - lang/java/java_doc.i | 1 - lang/java/wiredtiger.i | 233 ------------ lang/python/wiredtiger.i | 119 +----- src/btree/bt_curnext.c | 8 +- src/btree/bt_curprev.c | 8 +- src/btree/bt_cursor.c | 87 ++--- src/btree/bt_debug.c | 4 +- src/btree/bt_delete.c | 2 +- src/btree/bt_read.c | 16 +- src/btree/bt_split.c | 6 +- src/btree/bt_stat.c | 20 +- src/btree/col_modify.c | 17 +- src/btree/row_modify.c | 20 +- src/cursor/cur_backup.c | 1 - src/cursor/cur_config.c | 1 - src/cursor/cur_ds.c | 1 - src/cursor/cur_dump.c | 1 - src/cursor/cur_file.c | 6 - src/cursor/cur_index.c | 1 - src/cursor/cur_join.c | 2 - src/cursor/cur_log.c | 1 - src/cursor/cur_metadata.c | 1 - src/cursor/cur_stat.c | 1 - src/cursor/cur_std.c | 115 ------ src/cursor/cur_table.c | 2 - src/docs/Doxyfile | 1 - src/include/btmem.h | 37 +- src/include/cursor.h | 2 - src/include/extern.h | 7 +- src/include/log.h | 2 +- src/include/lsm.h | 4 +- src/include/mutex.h | 4 +- src/include/mutex.i | 8 +- src/include/schema.h | 2 +- src/include/stat.h | 4 - src/include/txn.i | 2 +- src/include/verify_build.h | 1 - src/include/wiredtiger.in | 406 +++++++++------------ src/lsm/lsm_cursor.c | 1 - src/reconcile/rec_write.c | 23 +- src/support/stat.c | 14 - src/txn/txn.c | 2 +- src/txn/txn_log.c | 6 +- test/csuite/scope/main.c | 102 +----- test/format/config.c | 17 +- test/format/config.h | 6 +- test/format/format.h | 27 +- test/format/ops.c | 278 +------------- test/suite/test_cursor12.py | 165 --------- 55 files changed, 355 insertions(+), 1527 deletions(-) delete mode 100644 test/suite/test_cursor12.py diff --git a/dist/s_define.list b/dist/s_define.list index 9f94132f584..b7f124ef18c 100644 --- a/dist/s_define.list +++ b/dist/s_define.list @@ -58,7 +58,7 @@ WT_STAT_INCRV_BASE WT_STAT_WRITE WT_TIMEDIFF_US WT_TRET_ERROR_OK -WT_UPDATE_SIZE +WT_UPDATE_RESERVED_VALUE WT_WITH_LOCK_NOWAIT WT_WITH_LOCK_WAIT __F diff --git a/dist/s_string.ok b/dist/s_string.ok index d5a562fcbd1..ac21c61a8ef 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -344,7 +344,6 @@ Split's Stoica StoreLoad StoreStore -Su Syscall TAILQ TCMalloc @@ -950,7 +949,6 @@ nd needkey needvalue negint -nentries newbar newfile newuri @@ -1094,7 +1092,6 @@ rotN rotn rp rpc -ru run's runtime rwlock diff --git a/dist/stat_data.py b/dist/stat_data.py index 203a88fb055..4a147ca44eb 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -231,8 +231,8 @@ connection_stats = [ CacheStat('cache_eviction_walks_abandoned', 'eviction walks abandoned'), CacheStat('cache_eviction_walks_active', 'files with active eviction walks', 'no_clear,no_scale'), CacheStat('cache_eviction_walks_started', 'files with new eviction walks started'), - CacheStat('cache_eviction_worker_created', 'eviction worker thread created'), CacheStat('cache_eviction_worker_evicting', 'eviction worker thread evicting pages'), + CacheStat('cache_eviction_worker_created', 'eviction worker thread created'), CacheStat('cache_eviction_worker_removed', 'eviction worker thread removed'), CacheStat('cache_hazard_checks', 'hazard pointer check calls'), CacheStat('cache_hazard_max', 'hazard pointer maximum array length', 'max_aggregate,no_scale'), @@ -262,11 +262,9 @@ connection_stats = [ ########################################## CursorStat('cursor_create', 'cursor create calls'), CursorStat('cursor_insert', 'cursor insert calls'), - CursorStat('cursor_modify', 'cursor modify calls'), CursorStat('cursor_next', 'cursor next calls'), CursorStat('cursor_prev', 'cursor prev calls'), CursorStat('cursor_remove', 'cursor remove calls'), - CursorStat('cursor_reserve', 'cursor reserve calls'), CursorStat('cursor_reset', 'cursor reset calls'), CursorStat('cursor_restart', 'cursor restarted searches'), CursorStat('cursor_search', 'cursor search calls'), @@ -555,12 +553,10 @@ dsrc_stats = [ CursorStat('cursor_insert', 'insert calls'), CursorStat('cursor_insert_bulk', 'bulk-loaded cursor-insert calls'), CursorStat('cursor_insert_bytes', 'cursor-insert key and value bytes inserted', 'size'), - CursorStat('cursor_modify', 'modify calls'), CursorStat('cursor_next', 'next calls'), CursorStat('cursor_prev', 'prev calls'), CursorStat('cursor_remove', 'remove calls'), CursorStat('cursor_remove_bytes', 'cursor-remove key bytes removed', 'size'), - CursorStat('cursor_reserve', 'reserve calls'), CursorStat('cursor_reset', 'reset calls'), CursorStat('cursor_restart', 'restarted searches'), CursorStat('cursor_search', 'search calls'), diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c index 5e1fa4bbcc5..f94863584e8 100644 --- a/examples/c/ex_all.c +++ b/examples/c/ex_all.c @@ -308,39 +308,6 @@ cursor_ops(WT_SESSION *session) /*! [Reserve a record] */ } - { - /*! [Modify an existing record] */ - WT_MODIFY entries[3]; - const char *key = "some key"; - ret = session->open_cursor( - session, "table:mytable", NULL, NULL, &cursor); - - /* Position the cursor. */ - cursor->set_key(cursor, key); - ret = cursor->search(cursor); - - /* Replace 20 bytes starting at byte offset 5. */ - entries[0].data.data = "some data"; - entries[0].data.size = strlen(entries[0].data.data); - entries[0].offset = 5; - entries[0].size = 20; - - /* Insert data at byte offset 40. */ - entries[1].data.data = "and more data"; - entries[1].data.size = strlen(entries[1].data.data); - entries[1].offset = 40; - entries[1].size = 0; - - /* Replace 2 bytes starting at byte offset 10. */ - entries[2].data.data = "and more data"; - entries[2].data.size = strlen(entries[2].data.data); - entries[2].offset = 10; - entries[2].size = 2; - - ret = cursor->modify(cursor, entries, 3); - /*! [Modify an existing record] */ - } - { /*! [Update an existing record or insert a new record] */ const char *key = "some key", *value = "some value"; diff --git a/examples/java/com/wiredtiger/examples/ex_cursor.java b/examples/java/com/wiredtiger/examples/ex_cursor.java index 4a57f3c35da..498ace12865 100644 --- a/examples/java/com/wiredtiger/examples/ex_cursor.java +++ b/examples/java/com/wiredtiger/examples/ex_cursor.java @@ -156,41 +156,6 @@ public class ex_cursor { } /*! [cursor remove] */ - /*! [cursor modify] */ - public static int - cursor_modify(Cursor cursor) - throws WiredTigerException - { - byte orig[] = new byte[4]; - for (int i = 0; i < 4; i++) - orig[i] = (byte)i; - cursor.putKeyString("key"); - cursor.putValueByteArray(orig); - cursor.insert(); // 0x0 0x1 0x2 0x3 - - byte b10[] = new byte[4]; - for (int i = 0; i < 4; i++) - b10[i] = (byte)(0x10 + i); - byte b20[] = new byte[4]; - for (int i = 0; i < 4; i++) - b20[i] = (byte)(0x20 + i); - - Modify modlist[] = new Modify[2]; - // The following Modify replaces one byte at position one by: - // (0x10 0x11 0x12 0x13), leaving: - // 0x0 0x10 0x11 0x12 0x13 0x2 0x3 - modlist[0] = new Modify(b10, 1, 1); - - // The following Modify replaces one byte at position three by: - // (0x20 0x21 0x22 0x23), leaving: - // 0x0 0x10 0x11 0x20 0x21 0x22 0x23 0x13 0x2 0x3 - modlist[1] = new Modify(b20, 3, 1); - - cursor.putKeyString("key"); - return (cursor.modify(modlist)); - } - /*! [cursor modify] */ - public static int cursorExample() throws WiredTigerException @@ -254,12 +219,6 @@ public class ex_cursor { ret = cursor_remove(cursor); ret = cursor.close(); - /* Create a table with a raw value to illustrate certain operations. */ - ret = session.create("table:raw", "key_format=S,value_format=u"); - cursor = session.open_cursor("table:raw", null, null); - ret = cursor_modify(cursor); - ret = cursor.close(); - /* Note: closing the connection implicitly closes open session(s). */ if ((ret = conn.close(null)) != 0) System.err.println("Error connecting to " + home + ": " + diff --git a/lang/java/Makefile.am b/lang/java/Makefile.am index 71515c430fd..2ff822a5d08 100644 --- a/lang/java/Makefile.am +++ b/lang/java/Makefile.am @@ -18,7 +18,6 @@ JAVA_SRC = \ $(JAVADESTFULL)/AsyncOpType.java \ $(JAVADESTFULL)/Connection.java \ $(JAVADESTFULL)/Cursor.java \ - $(JAVADESTFULL)/Modify.java \ $(JAVADESTFULL)/SearchStatus.java \ $(JAVADESTFULL)/PackFormatInputStream.java \ $(JAVADESTFULL)/PackInputStream.java \ @@ -32,7 +31,6 @@ JAVA_SRC = \ $(JAVADESTFULL)/wiredtiger.java \ $(JAVADESTFULL)/wiredtigerConstants.java \ $(JAVADESTFULL)/wiredtigerJNI.java \ - $(JAVADESTFULL)/WT_MODIFY_LIST.java \ $(JAVAEXAMPLES)/ex_access.java \ $(JAVAEXAMPLES)/ex_all.java \ $(JAVAEXAMPLES)/ex_async.java \ diff --git a/lang/java/java_doc.i b/lang/java/java_doc.i index f9e017ee43a..8088abbf065 100644 --- a/lang/java/java_doc.i +++ b/lang/java/java_doc.i @@ -12,7 +12,6 @@ COPYDOC(__wt_cursor, WT_CURSOR, reset) COPYDOC(__wt_cursor, WT_CURSOR, search) COPYDOC(__wt_cursor, WT_CURSOR, search_near) COPYDOC(__wt_cursor, WT_CURSOR, insert) -COPYDOC(__wt_cursor, WT_CURSOR, modify) COPYDOC(__wt_cursor, WT_CURSOR, update) COPYDOC(__wt_cursor, WT_CURSOR, remove) COPYDOC(__wt_cursor, WT_CURSOR, reserve) diff --git a/lang/java/wiredtiger.i b/lang/java/wiredtiger.i index 4c22a0af43b..d6fc5fc8b9a 100644 --- a/lang/java/wiredtiger.i +++ b/lang/java/wiredtiger.i @@ -47,7 +47,6 @@ %} %{ -#include "wiredtiger.h" #include "src/include/wt_internal.h" /* @@ -109,23 +108,6 @@ static void throwWiredTigerException(JNIEnv *jenv, int err) { (*jenv)->ThrowNew(jenv, excep, wiredtiger_strerror(err)); } -struct __wt_java_modify_impl; -struct __wt_java_modify_list; -typedef struct __wt_java_modify_impl WT_MODIFY_IMPL; -typedef struct __wt_java_modify_list WT_MODIFY_LIST; -static void modify_impl_release(WT_MODIFY_IMPL *impl); -static void modify_list_release(WT_MODIFY_LIST *impl); - -/* - * An extension to the WT_MODIFY struct, so we can associate some Java-specific - * information with it. - */ -typedef struct __wt_java_modify_impl { - WT_MODIFY modify; - JNIEnv *jnienv; - jobject ref; -} WT_MODIFY_IMPL; - %} /* No finalizers */ @@ -177,32 +159,6 @@ typedef struct __wt_java_modify_impl { } %} -/* - * In some cases, for an internal interface, we need something like a WT_ITEM, - * but we need to hold onto the memory past the method call, and release it - * later. A WT_ITEM_HOLD serves the purpose, it retains the java object - * for the byte array that we make into a global reference. - */ -%typemap(jni) WT_ITEM_HOLD, WT_ITEM_HOLD * "jbyteArray" -%typemap(jtype) WT_ITEM_HOLD, WT_ITEM_HOLD * "byte[]" -%typemap(jstype) WT_ITEM_HOLD, WT_ITEM_HOLD * "byte[]" - -%typemap(javain) WT_ITEM_HOLD, WT_ITEM_HOLD * "$javainput" -%typemap(javaout) WT_ITEM_HOLD, WT_ITEM_HOLD * { - return ($jnicall); -} -%typemap(in) WT_ITEM_HOLD * (WT_ITEM_HOLD item) %{ - $1 = &item; - $1->data = (*jenv)->GetByteArrayElements(jenv, $input, 0); - $1->size = (size_t)(*jenv)->GetArrayLength(jenv, $input); - $1->jnienv = jenv; - $1->ref = (*jenv)->NewGlobalRef(jenv, $input); -%} - -%typemap(argout) WT_ITEM_HOLD * %{ - /* Explicitly don't release the byte array elements here. */ -%} - /* Don't require empty config strings. */ %typemap(default) const char *config %{ $1 = NULL; %} @@ -353,10 +309,6 @@ WT_CLASS(struct __wt_async_op, WT_ASYNC_OP, op) %rename (prev_wrap) __wt_cursor::prev; %javamethodmodifiers __wt_cursor::key_format "protected"; %javamethodmodifiers __wt_cursor::value_format "protected"; -%ignore __wt_modify::data; -%ignore __wt_modify::position; -%ignore __wt_modify::size; -%ignore __wt_cursor::modify; %ignore __wt_cursor::compare(WT_CURSOR *, WT_CURSOR *, int *); %rename (compare_wrap) __wt_cursor::compare; @@ -1272,47 +1224,6 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; JCALL1(DeleteLocalRef, jcb->jnienv, jcursor); return (0); } - - int modify_wrap(WT_MODIFY_LIST *list, WT_ITEM *k) { - int ret; - - $self->set_key($self, k); - ret = $self->modify(self, list->mod_array, list->count); - modify_list_release(list); - return (ret); - } - - /* - * Called internally after a new call. The artificial constructor for - * WT_MODIFY_LIST has no opportunity to throw an exception on a memory - * allocation failure, so the the null check must be made within a - * method on WT_CURSOR. - */ - bool _new_check_modify_list(WT_MODIFY_LIST *list) { - JAVA_CALLBACK *jcb; - if (list == NULL) { - jcb = (JAVA_CALLBACK *)$self->lang_private; - throwWiredTigerException(jcb->jnienv, ENOMEM); - return (false); - } - return (true); - } - - /* - * Called internally after a new call. The artificial constructor for - * WT_MODIFY has no opportunity to throw an exception on a memory - * allocation failure, so the the null check must be made within a - * method on WT_CURSOR. - */ - bool _new_check_modify(WT_MODIFY *mod) { - JAVA_CALLBACK *jcb; - if (mod == NULL) { - jcb = (JAVA_CALLBACK *)$self->lang_private; - throwWiredTigerException(jcb->jnienv, ENOMEM); - return (false); - } - return (true); - } } /* Cache key/value formats in Cursor */ @@ -1909,149 +1820,6 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; return new PackInputStream(valueFormat, get_value_wrap(), _java_raw()); } - - /** - * Modify an existing record. - * - * The cursor must already be positioned, and the key's value will be - * updated. - * - * \param mods an array of modifications. - * \return 0 on success, errno on error. - */ - public int modify(Modify mods[]) - throws WiredTigerException { - byte[] key = keyPacker.getValue(); - keyPacker.reset(); - - WT_MODIFY_LIST l = new WT_MODIFY_LIST(mods.length); - if (!_new_check_modify_list(l)) - return (0); // exception is already thrown - int pos = 0; - - for (Modify m : mods) { - if (!_new_check_modify(m)) - return (0); // exception is already thrown - l.set(pos, m); - pos++; - } - return modify_wrap(l, key); - } -%} - -/* - * Support for WT_CURSOR.modify. - */ - -%inline %{ -typedef struct __wt_java_item_hold { -#ifndef SWIG - void *data; - size_t size; - JNIEnv *jnienv; - jobject ref; -#endif -} WT_ITEM_HOLD; - -/* - * An internal Java class encapsulates a list of Modify objects (stored as a - * WT_MODIFY array in C). - */ -typedef struct __wt_java_modify_list { -#ifndef SWIG - WT_MODIFY *mod_array; - jobject *ref_array; - JNIEnv *jnienv; - int count; -#endif -} WT_MODIFY_LIST; -%} -%extend __wt_java_modify_list { - __wt_java_modify_list(int count) { - WT_MODIFY_LIST *self; - if (__wt_calloc_def(NULL, 1, &self) != 0) - return (NULL); - if (__wt_calloc_def(NULL, (size_t)count, - &self->mod_array) != 0) { - __wt_free(NULL, self); - return (NULL); - } - if (__wt_calloc_def(NULL, (size_t)count, - &self->ref_array) != 0) { - __wt_free(NULL, self->mod_array); - __wt_free(NULL, self); - return (NULL); - } - self->count = count; - return (self); - } - ~__wt_java_modify_list() { - modify_list_release(self); - __wt_free(NULL, self); - } - void set(int i, WT_MODIFY *m) { - WT_MODIFY_IMPL *impl = (WT_MODIFY_IMPL *)m; - self->mod_array[i] = *m; - self->ref_array[i] = impl->ref; - impl->ref = (jobject)0; - self->jnienv = impl->jnienv; - } -}; - -%extend __wt_modify { - __wt_modify() { - WT_MODIFY_IMPL *self; - if (__wt_calloc_def(NULL, 1, &self) != 0) - return (NULL); - self->modify.data.data = NULL; - self->modify.data.size = 0; - self->modify.offset = 0; - self->modify.size = 0; - return (&self->modify); - } - __wt_modify(WT_ITEM_HOLD *itemdata, - size_t offset, size_t size) { - WT_MODIFY_IMPL *self; - if (__wt_calloc_def(NULL, 1, &self) != 0) - return (NULL); - self->modify.data.data = itemdata->data; - self->modify.data.size = itemdata->size; - self->modify.offset = offset; - self->modify.size = size; - self->ref = itemdata->ref; - self->jnienv = itemdata->jnienv; - return (&self->modify); - } - ~__wt_modify() { - modify_impl_release((WT_MODIFY_IMPL *)self); - __wt_free(NULL, self); - } -}; - -%{ -static void modify_list_release(WT_MODIFY_LIST *list) { - for (int i = 0; i < list->count; i++) - if (list->ref_array[i] != (jobject)0) { - (*list->jnienv)->ReleaseByteArrayElements( - list->jnienv, list->ref_array[i], - (jbyte *)list->mod_array[i].data.data, 0); - (*list->jnienv)->DeleteGlobalRef( - list->jnienv, list->ref_array[i]); - } - __wt_free(NULL, list->ref_array); - __wt_free(NULL, list->mod_array); - list->count = 0; -} - -static void modify_impl_release(WT_MODIFY_IMPL *impl) { - if (impl->ref != (jobject)0) { - (*impl->jnienv)->ReleaseByteArrayElements( - impl->jnienv, impl->ref, - (jbyte *)impl->modify.data.data, 0); - (*impl->jnienv)->DeleteGlobalRef(impl->jnienv, impl->ref); - impl->ref = (jobject)0; - } -} %} /* Put a WiredTigerException on all wrapped methods. We'd like this @@ -2134,7 +1902,6 @@ REQUIRE_WRAP(WT_ASYNC_OP::get_id, __wt_async_op::get_id,getId) %rename(AsyncOp) __wt_async_op; %rename(Cursor) __wt_cursor; -%rename(Modify) __wt_modify; %rename(Session) __wt_session; %rename(Connection) __wt_connection; diff --git a/lang/python/wiredtiger.i b/lang/python/wiredtiger.i index 61c7fc62c43..7a297312bb8 100644 --- a/lang/python/wiredtiger.i +++ b/lang/python/wiredtiger.i @@ -151,74 +151,6 @@ from packing import pack, unpack } } -%typemap(in) WT_MODIFY * (int len, WT_MODIFY *modarray, int i) { - len = PyList_Size($input); - /* - * We allocate an extra cleared WT_MODIFY struct, the first - * entry will be used solely to transmit the array length to - * the call site. - */ - if (__wt_calloc_def(NULL, (size_t)len + 1, &modarray) != 0) - SWIG_exception_fail(SWIG_MemoryError, "WT calloc failed"); - modarray[0].size = (size_t)len; - for (i = 1; i <= len; i++) { - PyObject *dataobj, *modobj, *offsetobj, *sizeobj; - char *datadata; - long offset, size; - Py_ssize_t datasize; - - if ((modobj = PySequence_GetItem($input, i - 1)) == NULL) - SWIG_exception_fail(SWIG_IndexError, - "Modify sequence failed"); - - WT_GETATTR(dataobj, modobj, "data"); - if (PyString_AsStringAndSize(dataobj, &datadata, - &datasize) < 0) { - Py_DECREF(dataobj); - Py_DECREF(modobj); - SWIG_exception_fail(SWIG_AttributeError, - "Modify.data bad value"); - } - modarray[i].data.data = malloc(datasize); - memcpy(modarray[i].data.data, datadata, datasize); - modarray[i].data.size = datasize; - Py_DECREF(dataobj); - - WT_GETATTR(offsetobj, modobj, "offset"); - if ((offset = PyInt_AsLong(offsetobj)) < 0) { - Py_DECREF(offsetobj); - Py_DECREF(modobj); - SWIG_exception_fail(SWIG_RuntimeError, - "Modify.offset bad value"); - } - modarray[i].offset = offset; - Py_DECREF(offsetobj); - - WT_GETATTR(sizeobj, modobj, "size"); - if ((size = PyInt_AsLong(sizeobj)) < 0) { - Py_DECREF(sizeobj); - Py_DECREF(modobj); - SWIG_exception_fail(SWIG_RuntimeError, - "Modify.size bad value"); - } - modarray[i].size = size; - Py_DECREF(sizeobj); - Py_DECREF(modobj); - } - $1 = modarray; -} - -%typemap(freearg) WT_MODIFY * { - /* The WT_MODIFY arg is in position 2. Is there a better way? */ - WT_MODIFY *modarray = modarray2; - size_t i, len; - - len = modarray[0].size; - for (i = 1; i <= len; i++) - __wt_free(NULL, modarray[i].data.data); - __wt_free(NULL, modarray); -} - /* 64 bit typemaps. */ %typemap(in) uint64_t { $1 = PyLong_AsUnsignedLongLong($input); @@ -312,13 +244,6 @@ static PyObject *wtError; static int sessionFreeHandler(WT_SESSION *session_arg); static int cursorFreeHandler(WT_CURSOR *cursor_arg); - -#define WT_GETATTR(var, parent, name) \ - do if ((var = PyObject_GetAttrString(parent, name)) == NULL) { \ - Py_DECREF(parent); \ - SWIG_exception_fail(SWIG_AttributeError, \ - "Modify." #name " get failed"); \ - } while(0) %} %init %{ @@ -448,8 +373,8 @@ retry: } %enddef -/* An API that returns a value that shouldn't be checked uses this. */ -%define ANY_OK(m) +/* Any API that returns an enum type uses this. */ +%define ENUM_OK(m) %exception m { $action } @@ -483,14 +408,12 @@ retry: %enddef EBUSY_OK(__wt_connection::async_new_op) -ANY_OK(__wt_async_op::get_type) +ENUM_OK(__wt_async_op::get_type) NOTFOUND_OK(__wt_cursor::next) NOTFOUND_OK(__wt_cursor::prev) NOTFOUND_OK(__wt_cursor::remove) NOTFOUND_OK(__wt_cursor::search) NOTFOUND_OK(__wt_cursor::update) -ANY_OK(__wt_modify::__wt_modify) -ANY_OK(__wt_modify::~__wt_modify) COMPARE_OK(__wt_cursor::_compare) COMPARE_OK(__wt_cursor::_equals) @@ -525,11 +448,6 @@ COMPARE_NOTFOUND_OK(__wt_cursor::_search_near) %ignore __wt_cursor::get_value; %ignore __wt_cursor::set_key; %ignore __wt_cursor::set_value; -%ignore __wt_cursor::modify(WT_CURSOR *, WT_MODIFY *, int); -%rename (modify) __wt_cursor::_modify; -%ignore __wt_modify::data; -%ignore __wt_modify::offset; -%ignore __wt_modify::size; /* Next, override methods that return integers via arguments. */ %ignore __wt_cursor::compare(WT_CURSOR *, WT_CURSOR *, int *); @@ -854,15 +772,6 @@ typedef int int_void; return (cursorFreeHandler($self)); } - /* - * modify: the size of the array was put into the first element by the - * typemap. - */ - int _modify(WT_MODIFY *list) { - int count = (int)list[0].size; - return (self->modify(self, &list[1], count)); - } - %pythoncode %{ def get_key(self): '''get_key(self) -> object @@ -961,21 +870,6 @@ typedef int int_void; %} }; -/* - * Support for WT_CURSOR.modify. The WT_MODIFY object is known to - * SWIG, but its attributes are regular Python attributes. - * We extract the attributes at the call site to WT_CURSOR.modify - * so we don't have to deal with managing Python objects references. - */ -%extend __wt_modify { -%pythoncode %{ - def __init__(self, data = '', offset = 0, size = 0): - self.data = data - self.offset = offset - self.size = size -%} -}; - %extend __wt_session { int _log_printf(const char *msg) { return self->log_printf(self, "%s", msg); @@ -1057,7 +951,6 @@ OVERRIDE_METHOD(__wt_session, WT_SESSION, log_printf, (self, msg)) %rename(AsyncOp) __wt_async_op; %rename(Cursor) __wt_cursor; -%rename(Modify) __wt_modify; %rename(Session) __wt_session; %rename(Connection) __wt_connection; @@ -1081,7 +974,7 @@ writeToPythonStream(const char *streamname, const char *message) written = NULL; arglist = arglist2 = NULL; msglen = strlen(message); - WT_RET(__wt_malloc(NULL, msglen + 2, &msg)); + msg = malloc(msglen + 2); strcpy(msg, message); strcpy(&msg[msglen], "\n"); @@ -1117,7 +1010,8 @@ err: Py_XDECREF(arglist2); /* Release python Global Interpreter Lock */ SWIG_PYTHON_THREAD_END_BLOCK; - __wt_free(NULL, msg); + if (msg) + free(msg); return (ret); } @@ -1338,3 +1232,4 @@ _rename_with_prefix('WT_STAT_CONN_', stat.conn) _rename_with_prefix('WT_STAT_DSRC_', stat.dsrc) del _rename_with_prefix %} + diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c index 7b92a58991d..091b9345713 100644 --- a/src/btree/bt_curnext.c +++ b/src/btree/bt_curnext.c @@ -142,7 +142,7 @@ new_page: if (cbt->ins == NULL) __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL) continue; - if (upd->type == WT_UPDATE_DELETED) { + if (WT_UPDATE_DELETED_ISSET(upd)) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -205,7 +205,7 @@ new_page: /* Find the matching WT_COL slot. */ upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd); if (upd != NULL) { - if (upd->type == WT_UPDATE_DELETED) { + if (WT_UPDATE_DELETED_ISSET(upd)) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -325,7 +325,7 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage) new_insert: if ((ins = cbt->ins) != NULL) { if ((upd = __wt_txn_read(session, ins->upd)) == NULL) continue; - if (upd->type == WT_UPDATE_DELETED) { + if (WT_UPDATE_DELETED_ISSET(upd)) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -358,7 +358,7 @@ new_insert: if ((ins = cbt->ins) != NULL) { cbt->slot = cbt->row_iteration_slot / 2 - 1; rip = &page->pg_row[cbt->slot]; upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip)); - if (upd != NULL && upd->type == WT_UPDATE_DELETED) { + if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd)) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c index 55b5095fe91..4f0fa77d3e6 100644 --- a/src/btree/bt_curprev.c +++ b/src/btree/bt_curprev.c @@ -288,7 +288,7 @@ new_page: if (cbt->ins == NULL) __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL) continue; - if (upd->type == WT_UPDATE_DELETED) { + if (WT_UPDATE_DELETED_ISSET(upd)) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -352,7 +352,7 @@ new_page: if (cbt->recno < cbt->ref->ref_recno) upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd); if (upd != NULL) { - if (upd->type == WT_UPDATE_DELETED) { + if (WT_UPDATE_DELETED_ISSET(upd)) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -482,7 +482,7 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage) new_insert: if ((ins = cbt->ins) != NULL) { if ((upd = __wt_txn_read(session, ins->upd)) == NULL) continue; - if (upd->type == WT_UPDATE_DELETED) { + if (WT_UPDATE_DELETED_ISSET(upd)) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -517,7 +517,7 @@ new_insert: if ((ins = cbt->ins) != NULL) { cbt->slot = cbt->row_iteration_slot / 2 - 1; rip = &page->pg_row[cbt->slot]; upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip)); - if (upd != NULL && upd->type == WT_UPDATE_DELETED) { + if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd)) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 7e415150cc5..664545ee3a0 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -224,7 +224,7 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) */ if (cbt->ins != NULL && (upd = __wt_txn_read(session, cbt->ins->upd)) != NULL) { - if (upd->type == WT_UPDATE_DELETED) + if (WT_UPDATE_DELETED_ISSET(upd)) return (false); if (updp != NULL) *updp = upd; @@ -297,7 +297,7 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) page->modify->mod_row_update != NULL && (upd = __wt_txn_read(session, page->modify->mod_row_update[cbt->slot])) != NULL) { - if (upd->type == WT_UPDATE_DELETED) + if (WT_UPDATE_DELETED_ISSET(upd)) return (false); if (updp != NULL) *updp = upd; @@ -342,11 +342,11 @@ __cursor_row_search( * Column-store delete, insert, and update from an application cursor. */ static inline int -__cursor_col_modify( - WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type) +__cursor_col_modify(WT_SESSION_IMPL *session, + WT_CURSOR_BTREE *cbt, bool is_remove, bool is_reserve) { return (__wt_col_modify(session, cbt, - cbt->iface.recno, &cbt->iface.value, NULL, modify_type)); + cbt->iface.recno, &cbt->iface.value, NULL, is_remove, is_reserve)); } /* @@ -354,11 +354,11 @@ __cursor_col_modify( * Row-store insert, update and delete from an application cursor. */ static inline int -__cursor_row_modify( - WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type) +__cursor_row_modify(WT_SESSION_IMPL *session, + WT_CURSOR_BTREE *cbt, bool is_remove, bool is_reserve) { return (__wt_row_modify(session, cbt, - &cbt->iface.key, &cbt->iface.value, NULL, modify_type)); + &cbt->iface.key, &cbt->iface.value, NULL, is_remove, is_reserve)); } /* @@ -662,8 +662,8 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) */ cbt->compare = 0; ret = btree->type == BTREE_ROW ? - __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD) : - __cursor_col_modify(session, cbt, WT_UPDATE_STANDARD); + __cursor_row_modify(session, cbt, false, false) : + __cursor_col_modify(session, cbt, false, false); if (ret == 0) goto done; @@ -700,7 +700,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) WT_ERR(WT_DUPLICATE_KEY); - ret = __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD); + ret = __cursor_row_modify(session, cbt, false, false); } else { /* * Optionally insert a new record (ignoring the application's @@ -723,7 +723,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); (cbt->compare != 0 && __cursor_fix_implicit(btree, cbt)))) WT_ERR(WT_DUPLICATE_KEY); - WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD)); + WT_ERR(__cursor_col_modify(session, cbt, false, false)); if (append_key) cbt->iface.recno = cbt->recno; @@ -881,8 +881,8 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) */ cbt->compare = 0; ret = btree->type == BTREE_ROW ? - __cursor_row_modify(session, cbt, WT_UPDATE_DELETED) : - __cursor_col_modify(session, cbt, WT_UPDATE_DELETED); + __cursor_row_modify(session, cbt, true, false) : + __cursor_col_modify(session, cbt, true, false); if (ret == 0) goto done; @@ -921,7 +921,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) WT_ERR(WT_NOTFOUND); - ret = __cursor_row_modify(session, cbt, WT_UPDATE_DELETED); + ret = __cursor_row_modify(session, cbt, true, false); } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); @@ -948,8 +948,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); */ cbt->recno = cursor->recno; } else - ret = __cursor_col_modify( - session, cbt, WT_UPDATE_DELETED); + ret = __cursor_col_modify(session, cbt, true, false); } err: if (ret == WT_RESTART) { @@ -987,7 +986,7 @@ done: /* * Update a record in the tree. */ static int -__btcur_update(WT_CURSOR_BTREE *cbt, u_int modify_type) +__btcur_update(WT_CURSOR_BTREE *cbt, bool is_reserve) { WT_BTREE *btree; WT_CURFILE_STATE state; @@ -999,6 +998,15 @@ __btcur_update(WT_CURSOR_BTREE *cbt, u_int modify_type) cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; + WT_STAT_CONN_INCR(session, cursor_update); + WT_STAT_DATA_INCR(session, cursor_update); + WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size); + + if (btree->type == BTREE_ROW) + WT_RET(__cursor_size_chk(session, &cursor->key)); + if (!is_reserve) + WT_RET(__cursor_size_chk(session, &cursor->value)); + /* It's no longer possible to bulk-load into the tree. */ __cursor_disable_bulk(session, btree); @@ -1022,8 +1030,8 @@ __btcur_update(WT_CURSOR_BTREE *cbt, u_int modify_type) */ cbt->compare = 0; ret = btree->type == BTREE_ROW ? - __cursor_row_modify(session, cbt, modify_type) : - __cursor_col_modify(session, cbt, modify_type); + __cursor_row_modify(session, cbt, false, is_reserve) : + __cursor_col_modify(session, cbt, false, is_reserve); if (ret == 0) goto done; @@ -1061,7 +1069,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) WT_ERR(WT_NOTFOUND); } - ret = __cursor_row_modify(session, cbt, modify_type); + ret = __cursor_row_modify(session, cbt, false, is_reserve); } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); @@ -1080,7 +1088,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); !__cursor_fix_implicit(btree, cbt)) WT_ERR(WT_NOTFOUND); } - ret = __cursor_col_modify(session, cbt, modify_type); + ret = __cursor_col_modify(session, cbt, false, is_reserve); } err: if (ret == WT_RESTART) { @@ -1098,7 +1106,7 @@ err: if (ret == WT_RESTART) { * pointer to the modify function's allocated update structure. */ done: if (ret == 0) { - if (modify_type == WT_UPDATE_RESERVED) { + if (is_reserve) { F_CLR(cursor, WT_CURSTD_VALUE_SET); WT_TRET(__wt_key_return(session, cbt)); } else @@ -1123,19 +1131,14 @@ __wt_btcur_reserve(WT_CURSOR_BTREE *cbt) { WT_CURSOR *cursor; WT_DECL_RET; - WT_SESSION_IMPL *session; bool overwrite; cursor = &cbt->iface; - session = (WT_SESSION_IMPL *)cursor->session; - - WT_STAT_CONN_INCR(session, cursor_reserve); - WT_STAT_DATA_INCR(session, cursor_reserve); /* WT_CURSOR.reserve is update-without-overwrite and a special value. */ overwrite = F_ISSET(cursor, WT_CURSTD_OVERWRITE); F_CLR(cursor, WT_CURSTD_OVERWRITE); - ret = __btcur_update(cbt, WT_UPDATE_RESERVED); + ret = __btcur_update(cbt, true); if (overwrite) F_SET(cursor, WT_CURSTD_OVERWRITE); return (ret); @@ -1148,23 +1151,7 @@ __wt_btcur_reserve(WT_CURSOR_BTREE *cbt) int __wt_btcur_update(WT_CURSOR_BTREE *cbt) { - WT_BTREE *btree; - WT_CURSOR *cursor; - WT_SESSION_IMPL *session; - - btree = cbt->btree; - cursor = &cbt->iface; - session = (WT_SESSION_IMPL *)cursor->session; - - WT_STAT_CONN_INCR(session, cursor_update); - WT_STAT_DATA_INCR(session, cursor_update); - WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size); - - if (btree->type == BTREE_ROW) - WT_RET(__cursor_size_chk(session, &cursor->key)); - WT_RET(__cursor_size_chk(session, &cursor->value)); - - return (__btcur_update(cbt, WT_UPDATE_STANDARD)); + return (__btcur_update(cbt, false)); } /* @@ -1287,7 +1274,7 @@ __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) static int __cursor_truncate(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, - int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int)) + int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, bool, bool)) { WT_DECL_RET; @@ -1315,7 +1302,7 @@ retry: WT_RET(__wt_btcur_search(start)); F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); for (;;) { - if ((ret = rmfunc(session, start, WT_UPDATE_DELETED)) != 0) + if ((ret = rmfunc(session, start, true, false)) != 0) break; if (stop != NULL && __cursor_equals(start, stop)) @@ -1342,7 +1329,7 @@ retry: WT_RET(__wt_btcur_search(start)); static int __cursor_truncate_fix(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, - int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int)) + int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, bool, bool)) { WT_DECL_RET; const uint8_t *value; @@ -1373,7 +1360,7 @@ retry: WT_RET(__wt_btcur_search(start)); for (;;) { value = (const uint8_t *)start->iface.value.data; if (*value != 0 && - (ret = rmfunc(session, start, WT_UPDATE_DELETED)) != 0) + (ret = rmfunc(session, start, true, false)) != 0) break; if (stop != NULL && __cursor_equals(start, stop)) diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index c3f98a98ec5..538c363a864 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -985,9 +985,9 @@ static int __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte) { for (; upd != NULL; upd = upd->next) - if (upd->type == WT_UPDATE_DELETED) + if (WT_UPDATE_DELETED_ISSET(upd)) WT_RET(ds->f(ds, "\tvalue {deleted}\n")); - else if (upd->type == WT_UPDATE_RESERVED) + else if (WT_UPDATE_RESERVED_ISSET(upd)) WT_RET(ds->f(ds, "\tvalue {reserved}\n")); else if (hexbyte) { WT_RET(ds->f(ds, "\t{")); diff --git a/src/btree/bt_delete.c b/src/btree/bt_delete.c index 4a88b672d47..12c3b044fda 100644 --- a/src/btree/bt_delete.c +++ b/src/btree/bt_delete.c @@ -333,7 +333,7 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) */ for (i = 0, size = 0; i < page->entries; ++i) { WT_ERR(__wt_calloc_one(session, &upd)); - upd->type = WT_UPDATE_DELETED; + WT_UPDATE_DELETED_SET(upd); if (page_del == NULL) upd->txnid = WT_TXN_NONE; /* Globally visible */ diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index e6a0f53ab40..72a69e8591c 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -90,8 +90,7 @@ __col_instantiate(WT_SESSION_IMPL *session, { /* Search the page and add updates. */ WT_RET(__wt_col_search(session, recno, ref, cbt)); - WT_RET(__wt_col_modify( - session, cbt, recno, NULL, upd, WT_UPDATE_STANDARD)); + WT_RET(__wt_col_modify(session, cbt, recno, NULL, upd, false, false)); return (0); } @@ -105,8 +104,7 @@ __row_instantiate(WT_SESSION_IMPL *session, { /* Search the page and add updates. */ WT_RET(__wt_row_search(session, key, ref, cbt, true)); - WT_RET(__wt_row_modify( - session, cbt, key, NULL, upd, WT_UPDATE_STANDARD)); + WT_RET(__wt_row_modify(session, cbt, key, NULL, upd, false, false)); return (0); } @@ -129,8 +127,7 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_UPDATE *first_upd, *last_upd, *upd; size_t incr, total_incr; uint64_t current_recno, las_counter, las_txnid, recno, upd_txnid; - uint32_t las_id, session_flags; - uint8_t upd_type; + uint32_t las_id, upd_size, session_flags; int exact; const uint8_t *p; @@ -191,10 +188,9 @@ __las_page_instantiate(WT_SESSION_IMPL *session, /* Allocate the WT_UPDATE structure. */ WT_ERR(cursor->get_value( - cursor, &upd_txnid, &upd_type, las_value)); - WT_ERR(__wt_update_alloc(session, las_value, &upd, &incr, - upd_type == WT_UPDATE_DELETED ? - WT_UPDATE_DELETED : WT_UPDATE_STANDARD)); + cursor, &upd_txnid, &upd_size, las_value)); + WT_ERR(__wt_update_alloc(session, las_value, + &upd, &incr, upd_size == WT_UPDATE_DELETED_VALUE, false)); total_incr += incr; upd->txnid = upd_txnid; diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index c2c56a18131..23210a556da 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -1424,8 +1424,8 @@ __split_multi_inmem( WT_ERR(__wt_col_search(session, recno, ref, &cbt)); /* Apply the modification. */ - WT_ERR(__wt_col_modify(session, - &cbt, recno, NULL, upd, WT_UPDATE_STANDARD)); + WT_ERR(__wt_col_modify( + session, &cbt, recno, NULL, upd, false, false)); break; case WT_PAGE_ROW_LEAF: /* Build a key. */ @@ -1447,7 +1447,7 @@ __split_multi_inmem( /* Apply the modification. */ WT_ERR(__wt_row_modify( - session, &cbt, key, NULL, upd, WT_UPDATE_STANDARD)); + session, &cbt, key, NULL, upd, false, false)); break; WT_ILLEGAL_VALUE_ERR(session); } diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c index e3b9bbced48..2b9c9bef8a2 100644 --- a/src/btree/bt_stat.c +++ b/src/btree/bt_stat.c @@ -178,9 +178,9 @@ __stat_page_col_var( */ WT_SKIP_FOREACH(ins, WT_COL_UPDATE(page, cip)) { upd = ins->upd; - if (upd->type == WT_UPDATE_RESERVED) + if (WT_UPDATE_RESERVED_ISSET(upd)) continue; - if (upd->type == WT_UPDATE_DELETED) { + if (WT_UPDATE_DELETED_ISSET(upd)) { if (!orig_deleted) { ++deleted_cnt; --entry_cnt; @@ -195,9 +195,9 @@ __stat_page_col_var( /* Walk any append list. */ WT_SKIP_FOREACH(ins, WT_COL_APPEND(page)) { - if (ins->upd->type == WT_UPDATE_RESERVED) + if (WT_UPDATE_RESERVED_ISSET(ins->upd)) continue; - if (ins->upd->type == WT_UPDATE_DELETED) + if (WT_UPDATE_DELETED_ISSET(ins->upd)) ++deleted_cnt; else ++entry_cnt; @@ -268,8 +268,8 @@ __stat_page_row_leaf( * key on the page. */ WT_SKIP_FOREACH(ins, WT_ROW_INSERT_SMALLEST(page)) - if (ins->upd->type != WT_UPDATE_DELETED && - ins->upd->type != WT_UPDATE_RESERVED) + if (!WT_UPDATE_DELETED_ISSET(ins->upd) && + !WT_UPDATE_RESERVED_ISSET(ins->upd)) ++entry_cnt; /* @@ -279,8 +279,8 @@ __stat_page_row_leaf( WT_ROW_FOREACH(page, rip, i) { upd = WT_ROW_UPDATE(page, rip); if (upd == NULL || - (upd->type != WT_UPDATE_DELETED && - upd->type != WT_UPDATE_RESERVED)) + (!WT_UPDATE_DELETED_ISSET(upd) && + !WT_UPDATE_RESERVED_ISSET(upd))) ++entry_cnt; if (upd == NULL && (cell = __wt_row_leaf_value_cell(page, rip, NULL)) != NULL && @@ -289,8 +289,8 @@ __stat_page_row_leaf( /* Walk K/V pairs inserted after the on-page K/V pair. */ WT_SKIP_FOREACH(ins, WT_ROW_INSERT(page, rip)) - if (ins->upd->type != WT_UPDATE_DELETED && - ins->upd->type != WT_UPDATE_RESERVED) + if (!WT_UPDATE_DELETED_ISSET(ins->upd) && + !WT_UPDATE_RESERVED_ISSET(ins->upd)) ++entry_cnt; } diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c index c256f03a612..b45f369f1c2 100644 --- a/src/btree/col_modify.c +++ b/src/btree/col_modify.c @@ -17,7 +17,8 @@ static int __col_insert_alloc( */ int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, - uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type) + uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, + bool is_remove, bool is_reserve) { static const WT_ITEM col_fix_remove = { "", 1, NULL, 0, 0 }; WT_BTREE *btree; @@ -37,15 +38,13 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, upd = upd_arg; append = logged = false; - if (modify_type == WT_UPDATE_DELETED || - modify_type == WT_UPDATE_RESERVED) { + if (is_remove || is_reserve) { /* * Fixed-size column-store doesn't have on-page deleted values, * it's a nul byte. */ - if (modify_type == WT_UPDATE_DELETED && - btree->type == BTREE_COL_FIX) { - modify_type = WT_UPDATE_STANDARD; + if (is_remove && btree->type == BTREE_COL_FIX) { + is_remove = false; value = &col_fix_remove; } } else { @@ -90,7 +89,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, /* Allocate a WT_UPDATE structure and transaction ID. */ WT_ERR(__wt_update_alloc(session, - value, &upd, &upd_size, modify_type)); + value, &upd, &upd_size, is_remove, is_reserve)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -151,7 +150,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, if (upd_arg == NULL) { WT_ERR(__wt_update_alloc(session, - value, &upd, &upd_size, modify_type)); + value, &upd, &upd_size, is_remove, is_reserve)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -196,7 +195,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, } /* If the update was successful, add it to the in-memory log. */ - if (logged && modify_type != WT_UPDATE_RESERVED) + if (logged && !is_reserve) WT_ERR(__wt_txn_log_op(session, cbt)); if (0) { diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c index 2bf3c2f29bc..d3b087f92c6 100644 --- a/src/btree/row_modify.c +++ b/src/btree/row_modify.c @@ -48,7 +48,7 @@ __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, - WT_UPDATE *upd_arg, u_int modify_type) + WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) { WT_DECL_RET; WT_INSERT *ins; @@ -97,7 +97,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, /* Allocate a WT_UPDATE structure and transaction ID. */ WT_ERR(__wt_update_alloc(session, - value, &upd, &upd_size, modify_type)); + value, &upd, &upd_size, is_remove, is_reserve)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -168,7 +168,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, if (upd_arg == NULL) { WT_ERR(__wt_update_alloc(session, - value, &upd, &upd_size, modify_type)); + value, &upd, &upd_size, is_remove, is_reserve)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -207,7 +207,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, &ins, ins_size, skipdepth)); } - if (logged && modify_type != WT_UPDATE_RESERVED) + if (logged && !is_reserve) WT_ERR(__wt_txn_log_op(session, cbt)); if (0) { @@ -261,7 +261,7 @@ __wt_row_insert_alloc(WT_SESSION_IMPL *session, */ int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, - WT_UPDATE **updp, size_t *sizep, u_int modify_type) + WT_UPDATE **updp, size_t *sizep, bool is_remove, bool is_reserve) { WT_UPDATE *upd; @@ -271,10 +271,13 @@ __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, * Allocate the WT_UPDATE structure and room for the value, then copy * the value into place. */ - if (modify_type == WT_UPDATE_DELETED || - modify_type == WT_UPDATE_RESERVED) + if (is_remove || is_reserve) { WT_RET(__wt_calloc(session, 1, sizeof(WT_UPDATE), &upd)); - else { + if (is_remove) + WT_UPDATE_DELETED_SET(upd); + if (is_reserve) + WT_UPDATE_RESERVED_SET(upd); + } else { WT_RET(__wt_calloc( session, 1, sizeof(WT_UPDATE) + value->size, &upd)); if (value->size != 0) { @@ -282,7 +285,6 @@ __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, memcpy(WT_UPDATE_DATA(upd), value->data, value->size); } } - upd->type = (uint8_t)modify_type; *updp = upd; *sizep = WT_UPDATE_MEMSIZE(upd); diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c index 60750b88900..a30cb6f0e17 100644 --- a/src/cursor/cur_backup.c +++ b/src/cursor/cur_backup.c @@ -119,7 +119,6 @@ __wt_curbackup_open(WT_SESSION_IMPL *session, __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ - __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_config.c b/src/cursor/cur_config.c index 6c198315e33..a0b87b2b3c6 100644 --- a/src/cursor/cur_config.c +++ b/src/cursor/cur_config.c @@ -39,7 +39,6 @@ __wt_curconfig_open(WT_SESSION_IMPL *session, __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ - __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_ds.c b/src/cursor/cur_ds.c index 4930bcdda13..e40cb30dd53 100644 --- a/src/cursor/cur_ds.c +++ b/src/cursor/cur_ds.c @@ -458,7 +458,6 @@ __wt_curds_open( __curds_search, /* search */ __curds_search_near, /* search-near */ __curds_insert, /* insert */ - __wt_cursor_modify_notsup, /* modify */ __curds_update, /* update */ __curds_remove, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_dump.c b/src/cursor/cur_dump.c index 3e90d321db6..73328da6246 100644 --- a/src/cursor/cur_dump.c +++ b/src/cursor/cur_dump.c @@ -369,7 +369,6 @@ __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) __curdump_search, /* search */ __curdump_search_near, /* search-near */ __curdump_insert, /* insert */ - __wt_cursor_modify_notsup, /* modify */ __curdump_update, /* update */ __curdump_remove, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index d6cf308077d..c43826799cf 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -352,11 +352,6 @@ __curfile_reserve(WT_CURSOR *cursor) WT_ERR(__wt_btcur_reserve(cbt)); - /* - * Reserve maintains a position and key, which doesn't match the library - * API, where reserve maintains a value. Fix the API by searching after - * each successful reserve operation. - */ WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0); @@ -435,7 +430,6 @@ __curfile_create(WT_SESSION_IMPL *session, __curfile_search, /* search */ __curfile_search_near, /* search-near */ __curfile_insert, /* insert */ - __wt_cursor_modify_notsup, /* modify */ __curfile_update, /* update */ __curfile_remove, /* remove */ __curfile_reserve, /* reserve */ diff --git a/src/cursor/cur_index.c b/src/cursor/cur_index.c index e8fcb1b2702..fcf00e4fa03 100644 --- a/src/cursor/cur_index.c +++ b/src/cursor/cur_index.c @@ -449,7 +449,6 @@ __wt_curindex_open(WT_SESSION_IMPL *session, __curindex_search, /* search */ __curindex_search_near, /* search-near */ __wt_cursor_notsup, /* insert */ - __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index e4ccb90139e..cebf8a7fd6e 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -591,7 +591,6 @@ __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __curjoin_extract_insert, /* insert */ - __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ @@ -1294,7 +1293,6 @@ __wt_curjoin_open(WT_SESSION_IMPL *session, __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ - __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_log.c b/src/cursor/cur_log.c index 38e9d4a1784..c8dc44bb392 100644 --- a/src/cursor/cur_log.c +++ b/src/cursor/cur_log.c @@ -342,7 +342,6 @@ __wt_curlog_open(WT_SESSION_IMPL *session, __curlog_search, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ - __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_metadata.c b/src/cursor/cur_metadata.c index d9aeed1fccd..9a38996d4ce 100644 --- a/src/cursor/cur_metadata.c +++ b/src/cursor/cur_metadata.c @@ -550,7 +550,6 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session, __curmetadata_search, /* search */ __curmetadata_search_near, /* search-near */ __curmetadata_insert, /* insert */ - __wt_cursor_modify_notsup, /* modify */ __curmetadata_update, /* update */ __curmetadata_remove, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c index a1ec1d75918..0bfe5679677 100644 --- a/src/cursor/cur_stat.c +++ b/src/cursor/cur_stat.c @@ -576,7 +576,6 @@ __wt_curstat_open(WT_SESSION_IMPL *session, __curstat_search, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ - __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c index f52d60fde01..e42c5c7766e 100644 --- a/src/cursor/cur_std.c +++ b/src/cursor/cur_std.c @@ -89,19 +89,6 @@ __wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) return (__wt_cursor_notsup(cursor)); } -/* - * __wt_cursor_modify_notsup -- - * Unsupported cursor modify. - */ -int -__wt_cursor_modify_notsup(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) -{ - WT_UNUSED(entries); - WT_UNUSED(nentries); - - return (__wt_cursor_notsup(cursor)); -} - /* * __wt_cursor_search_near_notsup -- * Unsupported cursor search-near. @@ -594,100 +581,6 @@ err: API_END(session, ret); return (ret); } -/* - * __cursor_modify -- - * WT_CURSOR->modify default implementation. - */ -static int -__cursor_modify(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) -{ - WT_DECL_RET; - WT_SESSION_IMPL *session; - WT_DECL_ITEM(ta); - WT_DECL_ITEM(tb); - WT_DECL_ITEM(tmp); - size_t len, size; - int i; - - CURSOR_UPDATE_API_CALL(cursor, session, modify, NULL); - WT_ERR(__cursor_checkkey(cursor)); - - /* Check for a rational modify vector count. */ - if (nentries <= 0) - WT_ERR_MSG( - session, EINVAL, "Illegal modify vector of %d", nentries); - - WT_STAT_CONN_INCR(session, cursor_modify); - WT_STAT_DATA_INCR(session, cursor_modify); - - /* Acquire position and value. */ - WT_ERR(cursor->search(cursor)); - - /* - * Process the entries to figure out how large a buffer we need. This is - * a bit pessimistic because we're ignoring replacement bytes, but it's - * a simpler calculation. - */ - for (size = cursor->value.size, i = 0; i < nentries; ++i) { - if (entries[i].offset >= size) - size = entries[i].offset; - size += entries[i].data.size; - } - - /* Allocate a pair of buffers. */ - WT_ERR(__wt_scr_alloc(session, size, &ta)); - WT_ERR(__wt_scr_alloc(session, size, &tb)); - - /* Apply the change vector to the value. */ - WT_ERR(__wt_buf_set( - session, ta, cursor->value.data, cursor->value.size)); - for (i = 0; i < nentries; ++i) { - /* Take leading bytes from the original, plus any gap bytes. */ - if (entries[i].offset >= ta->size) { - memcpy(tb->mem, ta->mem, ta->size); - if (entries[i].offset > ta->size) - memset((uint8_t *)tb->mem + ta->size, - '\0', entries[i].offset - ta->size); - } else - if (entries[i].offset > 0) - memcpy(tb->mem, ta->mem, entries[i].offset); - tb->size = entries[i].offset; - - /* Take replacement bytes. */ - if (entries[i].data.size > 0) { - memcpy((uint8_t *)tb->mem + tb->size, - entries[i].data.data, entries[i].data.size); - tb->size += entries[i].data.size; - } - - /* Take trailing bytes from the original. */ - len = entries[i].offset + entries[i].size; - if (ta->size > len) { - memcpy((uint8_t *)tb->mem + tb->size, - (uint8_t *)ta->mem + len, ta->size - len); - tb->size += ta->size - len; - } - WT_ASSERT(session, tb->size <= size); - - tmp = ta; - ta = tb; - tb = tmp; - } - - /* Set the cursor's value. */ - ta->data = ta->mem; - cursor->set_value(cursor, ta); - - /* We know both key and value are set, "overwrite" doesn't matter. */ - ret = cursor->update(cursor); - -err: __wt_scr_free(session, &ta); - __wt_scr_free(session, &tb); - - CURSOR_UPDATE_API_END(session, ret); - return (ret); -} - /* * __wt_cursor_reconfigure -- * Set runtime-configurable settings. @@ -863,14 +756,6 @@ __wt_cursor_init(WT_CURSOR *cursor, if (cval.val != 0) F_SET(cursor, WT_CURSTD_RAW); - /* - * WT_CURSOR.modify supported on 'u' value formats, but may have been - * already initialized. - */ - if (WT_STREQ(cursor->value_format, "u") && - cursor->modify == __wt_cursor_modify_notsup) - cursor->modify = __cursor_modify; - /* * Cursors that are internal to some other cursor (such as file cursors * inside a table cursor) should be closed after the containing cursor. diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c index 7a04033e9a4..c6514aaac58 100644 --- a/src/cursor/cur_table.c +++ b/src/cursor/cur_table.c @@ -91,7 +91,6 @@ __wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx, __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __curextract_insert, /* insert */ - __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ @@ -950,7 +949,6 @@ __wt_curtable_open(WT_SESSION_IMPL *session, __curtable_search, /* search */ __curtable_search_near, /* search-near */ __curtable_insert, /* insert */ - __wt_cursor_modify_notsup, /* modify */ __curtable_update, /* update */ __curtable_remove, /* remove */ __curtable_reserve, /* reserve */ diff --git a/src/docs/Doxyfile b/src/docs/Doxyfile index e7382e2bc5e..3d8c46962f1 100644 --- a/src/docs/Doxyfile +++ b/src/docs/Doxyfile @@ -1582,7 +1582,6 @@ PREDEFINED = DOXYGEN \ __wt_file_system:=WT_FILE_SYSTEM \ __wt_item:=WT_ITEM \ __wt_lsn:=WT_LSN \ - __wt_modify:=WT_MODIFY \ __wt_session:=WT_SESSION \ __wt_txn_notify:=WT_TXN_NOTIFY \ WT_HANDLE_CLOSED(x):=x \ diff --git a/src/include/btmem.h b/src/include/btmem.h index 4e8d3c05d7d..6755db81007 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -208,7 +208,7 @@ struct __wt_ovfl_txnc { */ #define WT_LAS_FORMAT \ "key_format=" WT_UNCHECKED_STRING(IuQQu) \ - ",value_format=" WT_UNCHECKED_STRING(QBu) + ",value_format=" WT_UNCHECKED_STRING(QIu) /* * WT_PAGE_MODIFY -- @@ -809,11 +809,11 @@ struct __wt_row { /* On-page key, on-page cell, or off-page WT_IKEY */ * Walk the entries of an in-memory row-store leaf page. */ #define WT_ROW_FOREACH(page, rip, i) \ - for ((i) = (page)->entries, \ + for ((i) = (page)->entries, \ (rip) = (page)->pg_row; (i) > 0; ++(rip), --(i)) #define WT_ROW_FOREACH_REVERSE(page, rip, i) \ - for ((i) = (page)->entries, \ - (rip) = (page)->pg_row + ((page)->entries - 1); \ + for ((i) = (page)->entries, \ + (rip) = (page)->pg_row + ((page)->entries - 1); \ (i) > 0; --(rip), --(i)) /* @@ -861,7 +861,7 @@ struct __wt_col { * Walk the entries of variable-length column-store leaf page. */ #define WT_COL_FOREACH(page, cip, i) \ - for ((i) = (page)->entries, \ + for ((i) = (page)->entries, \ (cip) = (page)->pg_var; (i) > 0; ++(cip), --(i)) /* @@ -908,16 +908,23 @@ struct __wt_ikey { * list. */ WT_PACKED_STRUCT_BEGIN(__wt_update) - uint64_t txnid; /* transaction */ + uint64_t txnid; /* update transaction */ WT_UPDATE *next; /* forward-linked list */ - uint32_t size; /* data length */ + /* + * Use the maximum size and maximum size-1 as is-deleted and is-reserved + * flags (which means we can't store 4GB objects), instead of increasing + * the size of this structure for a flag bit. + */ +#define WT_UPDATE_DELETED_VALUE UINT32_MAX +#define WT_UPDATE_DELETED_SET(u) ((u)->size = WT_UPDATE_DELETED_VALUE) +#define WT_UPDATE_DELETED_ISSET(u) ((u)->size == WT_UPDATE_DELETED_VALUE) -#define WT_UPDATE_STANDARD 0 -#define WT_UPDATE_DELETED 1 -#define WT_UPDATE_RESERVED 2 - uint8_t type; /* type (one byte to conserve memory) */ +#define WT_UPDATE_RESERVED_VALUE (UINT32_MAX - 1) +#define WT_UPDATE_RESERVED_SET(u) ((u)->size = WT_UPDATE_RESERVED_VALUE) +#define WT_UPDATE_RESERVED_ISSET(u) ((u)->size == WT_UPDATE_RESERVED_VALUE) + uint32_t size; /* update length */ /* The untyped value immediately follows the WT_UPDATE structure. */ #define WT_UPDATE_DATA(upd) \ @@ -929,13 +936,9 @@ WT_PACKED_STRUCT_BEGIN(__wt_update) * cache overhead calculation. */ #define WT_UPDATE_MEMSIZE(upd) \ - WT_ALIGN(sizeof(WT_UPDATE) + (upd)->size, 32) + WT_ALIGN(sizeof(WT_UPDATE) + (WT_UPDATE_DELETED_ISSET(upd) || \ + WT_UPDATE_RESERVED_ISSET(upd) ? 0 : (upd)->size), 32) WT_PACKED_STRUCT_END -/* - * WT_UPDATE_SIZE is the expected structure size -- we verify the build to - * ensure the compiler hasn't inserted padding. - */ -#define WT_UPDATE_SIZE 21 /* * WT_INSERT -- diff --git a/src/include/cursor.h b/src/include/cursor.h index 8d2f2c80c2a..b044329fbfe 100644 --- a/src/include/cursor.h +++ b/src/include/cursor.h @@ -22,7 +22,6 @@ search, \ search_near, \ insert, \ - modify, \ update, \ remove, \ reserve, \ @@ -45,7 +44,6 @@ search, \ search_near, \ insert, \ - modify, \ update, \ remove, \ reserve, \ diff --git a/src/include/extern.h b/src/include/extern.h index 01c21b188c0..a3ce0f3746f 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -180,7 +180,7 @@ extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *b extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tree_walk_skip( WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -189,9 +189,9 @@ extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, c extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, const WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, size_t *sizep, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, size_t *sizep, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd); extern void __wt_update_obsolete_free( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd); extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -314,7 +314,6 @@ extern void __wt_cursor_set_key_notsup(WT_CURSOR *cursor, ...); extern void __wt_cursor_set_value_notsup(WT_CURSOR *cursor, ...); extern int __wt_cursor_compare_notsup(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_cursor_modify_notsup(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_cursor_search_near_notsup(WT_CURSOR *cursor, int *exact) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_cursor_reconfigure_notsup(WT_CURSOR *cursor, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_cursor_set_notsup(WT_CURSOR *cursor); diff --git a/src/include/log.h b/src/include/log.h index e7bc28cd220..f80514a3546 100644 --- a/src/include/log.h +++ b/src/include/log.h @@ -130,7 +130,7 @@ union __wt_lsn { #define WT_LOG_SLOT_FLAGS(state) ((state) & WT_LOG_SLOT_MASK_ON) #define WT_LOG_SLOT_JOINED(state) (((state) & WT_LOG_SLOT_MASK_OFF) >> 32) #define WT_LOG_SLOT_JOINED_BUFFERED(state) \ - (WT_LOG_SLOT_JOINED(state) & \ + (WT_LOG_SLOT_JOINED(state) & \ (WT_LOG_SLOT_UNBUFFERED - 1)) #define WT_LOG_SLOT_JOIN_REL(j, r, s) (((j) << 32) + (r) + (s)) #define WT_LOG_SLOT_RELEASED(state) ((int64_t)(int32_t)(state)) diff --git a/src/include/lsm.h b/src/include/lsm.h index f8d0f480cbb..08313438eb8 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -240,11 +240,11 @@ struct __wt_lsm_tree { * area, copying them into place when a statistics cursor is created. */ #define WT_LSM_TREE_STAT_INCR(session, fld) do { \ - if (WT_STAT_ENABLED(session)) \ + if (WT_STAT_ENABLED(session)) \ ++(fld); \ } while (0) #define WT_LSM_TREE_STAT_INCRV(session, fld, v) do { \ - if (WT_STAT_ENABLED(session)) \ + if (WT_STAT_ENABLED(session)) \ (fld) += (int64_t)(v); \ } while (0) int64_t bloom_false_positive; diff --git a/src/include/mutex.h b/src/include/mutex.h index 00babd47fbf..c0e25ebb295 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -66,8 +66,8 @@ struct __wt_spinlock { WT_CACHE_LINE_PAD_BEGIN #if SPINLOCK_TYPE == SPINLOCK_GCC volatile int lock; -#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \ - SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE || \ +#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ + SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE ||\ SPINLOCK_TYPE == SPINLOCK_MSVC wt_mutex_t lock; #else diff --git a/src/include/mutex.i b/src/include/mutex.i index 5b14bb24730..44b8494cdbf 100644 --- a/src/include/mutex.i +++ b/src/include/mutex.i @@ -102,8 +102,8 @@ __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) __sync_lock_release(&t->lock); } -#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \ - SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE +#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ + SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE /* * __wt_spin_init -- @@ -142,8 +142,8 @@ __wt_spin_destroy(WT_SESSION_IMPL *session, WT_SPINLOCK *t) } } -#if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \ - SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE +#if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ + SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE /* * __wt_spin_trylock -- diff --git a/src/include/schema.h b/src/include/schema.h index 8b8ee5616d1..fa836084834 100644 --- a/src/include/schema.h +++ b/src/include/schema.h @@ -323,7 +323,7 @@ struct __wt_table { F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ } \ if (__handle_write_locked) { \ - __wt_writelock(session, &__conn->dhandle_lock); \ + __wt_writelock(session, &__conn->dhandle_lock); \ F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ } \ } while (0) diff --git a/src/include/stat.h b/src/include/stat.h index fa62cf27693..beb589dc0ef 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -374,11 +374,9 @@ struct __wt_connection_stats { int64_t write_io; int64_t cursor_create; int64_t cursor_insert; - int64_t cursor_modify; int64_t cursor_next; int64_t cursor_prev; int64_t cursor_remove; - int64_t cursor_reserve; int64_t cursor_reset; int64_t cursor_restart; int64_t cursor_search; @@ -611,11 +609,9 @@ struct __wt_dsrc_stats { int64_t cursor_remove_bytes; int64_t cursor_update_bytes; int64_t cursor_insert; - int64_t cursor_modify; int64_t cursor_next; int64_t cursor_prev; int64_t cursor_remove; - int64_t cursor_reserve; int64_t cursor_reset; int64_t cursor_restart; int64_t cursor_search; diff --git a/src/include/txn.i b/src/include/txn.i index f7321af5b12..4b6ba17853f 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -235,7 +235,7 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd) { /* Skip reserved place-holders, they're never visible. */ for (; upd != NULL; upd = upd->next) - if (upd->type != WT_UPDATE_RESERVED && + if (!WT_UPDATE_RESERVED_ISSET(upd) && __wt_txn_visible(session, upd->txnid)) break; diff --git a/src/include/verify_build.h b/src/include/verify_build.h index e93f5931c21..d2ccf206990 100644 --- a/src/include/verify_build.h +++ b/src/include/verify_build.h @@ -52,7 +52,6 @@ __wt_verify_build(void) /* Check specific structures weren't padded. */ WT_SIZE_CHECK(WT_BLOCK_DESC, WT_BLOCK_DESC_SIZE); WT_SIZE_CHECK(WT_REF, WT_REF_SIZE); - WT_SIZE_CHECK(WT_UPDATE, WT_UPDATE_SIZE); /* Check specific structures were padded. */ #define WT_PADDING_CHECK(s) \ diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 5e76b2915b1..b93fbebef25 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -49,6 +49,12 @@ extern "C" { #define WT_ATTRIBUTE_LIBRARY_VISIBLE __attribute__((visibility("default"))) #endif +#ifdef SWIG +%{ +#include +%} +#endif + /*! * @defgroup wt WiredTiger API * The functions, handles and methods applications use to access and manage @@ -78,7 +84,6 @@ struct __wt_extractor; typedef struct __wt_extractor WT_EXTRACTOR; struct __wt_file_handle; typedef struct __wt_file_handle WT_FILE_HANDLE; struct __wt_file_system; typedef struct __wt_file_system WT_FILE_SYSTEM; struct __wt_item; typedef struct __wt_item WT_ITEM; -struct __wt_modify; typedef struct __wt_modify WT_MODIFY; struct __wt_session; typedef struct __wt_session WT_SESSION; #if defined(SWIGJAVA) @@ -132,43 +137,6 @@ struct __wt_item { #endif }; -/*! - * A set of modifications for a value, including a pointer to new data and a - * length, plus a target offset in the value and an optional length of data - * in the value to be replaced. - * - * WT_MODIFY structures do not need to be cleared before use. - */ -struct __wt_modify { - /*! - * New data. The size of the new data may be zero when no new data is - * provided. - */ - WT_ITEM data; - - /*! - * The zero-based byte offset in the value where the new data is placed. - * - * If the offset is past the end of the value, nul bytes are appended to - * the value up to the specified offset. - */ - size_t offset; - - /*! - * The number of bytes in the value to be replaced. - * - * If the size is zero, no bytes from the value are replaced and the new - * data is inserted. - * - * If the offset is past the end of the value, the size is ignored. - * - * If the offset plus the size overlaps the end of the previous value, - * bytes from the offset to the end of the value are replaced and any - * remaining new data is appended. - */ - size_t size; -}; - /*! * The maximum packed size of a 64-bit integer. The ::wiredtiger_struct_pack * function will pack single long integers into at most this many bytes. @@ -477,38 +445,6 @@ struct __wt_cursor { */ int __F(insert)(WT_CURSOR *cursor); - /*! - * Modify an existing record. - * - * Both the key and value must be set and the record must already exist; - * the record will be updated. - * - * Modification structures are applied in order, and later modifications - * can update earlier modifications. - * - * The modify method is only supported on raw byte arrays accessed using - * a WT_ITEM structure, that is, a format type of \c u. - * - * @snippet ex_all.c Modify an existing record - * - * On success, the cursor ends positioned at the modified record; to - * minimize cursor resources, the WT_CURSOR::reset method should be - * called as soon as the cursor no longer needs that position. - * - * The maximum length of a single column stored in a table is not fixed - * (as it partially depends on the underlying file configuration), but - * is always a small number of bytes less than 4GB. - * - * @param cursor the cursor handle - * @param entries an array of modification data structures - * @param nentries the number of modification data structures - * @errors - * In particular, if \c in_memory is configured for the database and - * the modify requires more than the configured cache size to complete, - * ::WT_CACHE_FULL is returned. - */ - int __F(modify)(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries); - /*! * Update an existing record and optionally insert a record. * @@ -538,7 +474,7 @@ struct __wt_cursor { * @errors * In particular, if \c overwrite=false is configured and no record with * the specified key exists, ::WT_NOTFOUND is returned. - * Also, if \c in_memory is configured for the database and the update + * Also, if \c in_memory is configured for the database and the insert * requires more than the configured cache size to complete, * ::WT_CACHE_FULL is returned. */ @@ -4678,296 +4614,292 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CURSOR_CREATE 1115 /*! cursor: cursor insert calls */ #define WT_STAT_CONN_CURSOR_INSERT 1116 -/*! cursor: cursor modify calls */ -#define WT_STAT_CONN_CURSOR_MODIFY 1117 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1118 +#define WT_STAT_CONN_CURSOR_NEXT 1117 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1119 +#define WT_STAT_CONN_CURSOR_PREV 1118 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1120 -/*! cursor: cursor reserve calls */ -#define WT_STAT_CONN_CURSOR_RESERVE 1121 +#define WT_STAT_CONN_CURSOR_REMOVE 1119 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1122 +#define WT_STAT_CONN_CURSOR_RESET 1120 /*! cursor: cursor restarted searches */ -#define WT_STAT_CONN_CURSOR_RESTART 1123 +#define WT_STAT_CONN_CURSOR_RESTART 1121 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1124 +#define WT_STAT_CONN_CURSOR_SEARCH 1122 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1125 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1123 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1126 +#define WT_STAT_CONN_CURSOR_UPDATE 1124 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1127 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1125 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1128 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1126 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1129 +#define WT_STAT_CONN_DH_SWEEP_REF 1127 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1130 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1128 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1131 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1129 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1132 +#define WT_STAT_CONN_DH_SWEEP_TOD 1130 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1133 +#define WT_STAT_CONN_DH_SWEEPS 1131 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1134 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1132 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1135 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1133 /*! lock: checkpoint lock acquisitions */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1136 +#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1134 /*! lock: checkpoint lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1137 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1135 /*! lock: checkpoint lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1138 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1136 /*! lock: handle-list lock eviction thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1139 +#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1137 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1140 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1138 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1141 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1139 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1142 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1140 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1143 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1141 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1144 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1142 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1145 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1143 /*! lock: table lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_COUNT 1146 +#define WT_STAT_CONN_LOCK_TABLE_COUNT 1144 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1147 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1145 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1148 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1146 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1149 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1147 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1150 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1148 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1151 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1149 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1152 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1150 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1153 +#define WT_STAT_CONN_LOG_FLUSH 1151 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1154 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1152 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1155 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1153 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1156 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1154 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1157 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1155 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1158 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1156 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1159 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1157 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1160 +#define WT_STAT_CONN_LOG_SCANS 1158 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1161 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1159 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1162 +#define WT_STAT_CONN_LOG_WRITE_LSN 1160 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1163 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1161 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1164 +#define WT_STAT_CONN_LOG_SYNC 1162 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1165 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1163 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1166 +#define WT_STAT_CONN_LOG_SYNC_DIR 1164 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1167 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1165 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1168 +#define WT_STAT_CONN_LOG_WRITES 1166 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1169 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1167 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1170 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1168 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1171 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1169 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1172 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1170 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1173 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1171 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1174 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1172 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1175 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1173 /*! log: slot close lost race */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1176 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1174 /*! log: slot close unbuffered waits */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1177 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1175 /*! log: slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1178 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1176 /*! log: slot join atomic update races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1179 +#define WT_STAT_CONN_LOG_SLOT_RACES 1177 /*! log: slot join calls atomic updates raced */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1180 +#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1178 /*! log: slot join calls did not yield */ -#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1181 +#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1179 /*! log: slot join calls found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1182 +#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1180 /*! log: slot join calls slept */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1183 +#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1181 /*! log: slot join calls yielded */ -#define WT_STAT_CONN_LOG_SLOT_YIELD 1184 +#define WT_STAT_CONN_LOG_SLOT_YIELD 1182 /*! log: slot join found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1185 +#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1183 /*! log: slot joins yield time (usecs) */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1186 +#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1184 /*! log: slot transitions unable to find free slot */ -#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1187 +#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1185 /*! log: slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1188 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1186 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1189 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1187 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1190 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1188 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1191 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1189 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1192 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1190 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1193 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1191 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1194 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1192 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1195 +#define WT_STAT_CONN_REC_PAGES 1193 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1196 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1194 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1197 +#define WT_STAT_CONN_REC_PAGE_DELETE 1195 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1198 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1196 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1199 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1197 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1200 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1198 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1201 +#define WT_STAT_CONN_SESSION_OPEN 1199 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1202 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1200 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1203 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1201 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1204 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1202 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1205 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1203 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1206 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1204 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1207 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1205 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1208 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1206 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1209 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1207 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1210 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1208 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1211 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1209 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1212 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1210 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1213 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1211 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1214 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1212 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1215 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1213 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1216 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1214 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1217 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1215 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1218 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1216 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1219 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1217 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1220 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1218 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1221 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1219 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1222 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1220 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1223 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1221 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1224 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1222 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1225 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1223 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1226 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1224 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1227 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1225 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1228 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1226 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1229 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1227 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1230 +#define WT_STAT_CONN_PAGE_SLEEP 1228 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1231 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1229 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1232 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1230 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1233 +#define WT_STAT_CONN_TXN_BEGIN 1231 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1234 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1232 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1235 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1233 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1236 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1234 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1237 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1235 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1238 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1236 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1239 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1237 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1240 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1238 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1241 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1239 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1242 +#define WT_STAT_CONN_TXN_CHECKPOINT 1240 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1243 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1241 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1244 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1242 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1245 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1243 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1246 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1244 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1247 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1245 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1248 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1246 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1249 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1247 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1250 +#define WT_STAT_CONN_TXN_SYNC 1248 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1251 +#define WT_STAT_CONN_TXN_COMMIT 1249 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1252 +#define WT_STAT_CONN_TXN_ROLLBACK 1250 /*! * @} @@ -5249,65 +5181,61 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2092 /*! cursor: insert calls */ #define WT_STAT_DSRC_CURSOR_INSERT 2093 -/*! cursor: modify calls */ -#define WT_STAT_DSRC_CURSOR_MODIFY 2094 /*! cursor: next calls */ -#define WT_STAT_DSRC_CURSOR_NEXT 2095 +#define WT_STAT_DSRC_CURSOR_NEXT 2094 /*! cursor: prev calls */ -#define WT_STAT_DSRC_CURSOR_PREV 2096 +#define WT_STAT_DSRC_CURSOR_PREV 2095 /*! cursor: remove calls */ -#define WT_STAT_DSRC_CURSOR_REMOVE 2097 -/*! cursor: reserve calls */ -#define WT_STAT_DSRC_CURSOR_RESERVE 2098 +#define WT_STAT_DSRC_CURSOR_REMOVE 2096 /*! cursor: reset calls */ -#define WT_STAT_DSRC_CURSOR_RESET 2099 +#define WT_STAT_DSRC_CURSOR_RESET 2097 /*! cursor: restarted searches */ -#define WT_STAT_DSRC_CURSOR_RESTART 2100 +#define WT_STAT_DSRC_CURSOR_RESTART 2098 /*! cursor: search calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH 2101 +#define WT_STAT_DSRC_CURSOR_SEARCH 2099 /*! cursor: search near calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2102 +#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2100 /*! cursor: truncate calls */ -#define WT_STAT_DSRC_CURSOR_TRUNCATE 2103 +#define WT_STAT_DSRC_CURSOR_TRUNCATE 2101 /*! cursor: update calls */ -#define WT_STAT_DSRC_CURSOR_UPDATE 2104 +#define WT_STAT_DSRC_CURSOR_UPDATE 2102 /*! reconciliation: dictionary matches */ -#define WT_STAT_DSRC_REC_DICTIONARY 2105 +#define WT_STAT_DSRC_REC_DICTIONARY 2103 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2106 +#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2104 /*! * reconciliation: internal page key bytes discarded using suffix * compression */ -#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2107 +#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2105 /*! reconciliation: internal page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2108 +#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2106 /*! reconciliation: internal-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2109 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2107 /*! reconciliation: leaf page key bytes discarded using prefix compression */ -#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2110 +#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2108 /*! reconciliation: leaf page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2111 +#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2109 /*! reconciliation: leaf-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2112 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2110 /*! reconciliation: maximum blocks required for a page */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2113 +#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2111 /*! reconciliation: overflow values written */ -#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2114 +#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2112 /*! reconciliation: page checksum matches */ -#define WT_STAT_DSRC_REC_PAGE_MATCH 2115 +#define WT_STAT_DSRC_REC_PAGE_MATCH 2113 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_DSRC_REC_PAGES 2116 +#define WT_STAT_DSRC_REC_PAGES 2114 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_DSRC_REC_PAGES_EVICTION 2117 +#define WT_STAT_DSRC_REC_PAGES_EVICTION 2115 /*! reconciliation: pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE 2118 +#define WT_STAT_DSRC_REC_PAGE_DELETE 2116 /*! session: object compaction */ -#define WT_STAT_DSRC_SESSION_COMPACT 2119 +#define WT_STAT_DSRC_SESSION_COMPACT 2117 /*! session: open cursor count */ -#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2120 +#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2118 /*! transaction: update conflicts */ -#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2121 +#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2119 /*! * @} diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index b14e94eb93e..e62d6cab584 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -1756,7 +1756,6 @@ __wt_clsm_open(WT_SESSION_IMPL *session, __clsm_search, /* search */ __clsm_search_near, /* search-near */ __clsm_insert, /* insert */ - __wt_cursor_modify_notsup, /* modify */ __clsm_update, /* update */ __clsm_remove, /* remove */ __clsm_reserve, /* reserve */ diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 8f7769766a9..52a279b8c96 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -1227,8 +1227,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, } /* Reconciliation should never see a reserved update. */ - WT_ASSERT(session, - *updp == NULL || (*updp)->type != WT_UPDATE_RESERVED); + WT_ASSERT(session, *updp == NULL || !WT_UPDATE_RESERVED_ISSET(*updp)); /* * If all of the updates were aborted, quit. This test is not strictly @@ -1412,14 +1411,14 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, * place a deleted record at the end of the update list. */ if (vpack == NULL || vpack->type == WT_CELL_DEL) - WT_RET(__wt_update_alloc(session, - NULL, &append, ¬used, WT_UPDATE_DELETED)); + WT_RET(__wt_update_alloc( + session, NULL, &append, ¬used, true, false)); else { WT_RET(__wt_scr_alloc(session, 0, &tmp)); if ((ret = __wt_page_cell_data_ref( session, page, vpack, tmp)) == 0) ret = __wt_update_alloc(session, - tmp, &append, ¬used, WT_UPDATE_STANDARD); + tmp, &append, ¬used, false, false); __wt_scr_free(session, &tmp); WT_RET(ret); } @@ -3676,20 +3675,20 @@ __rec_update_las(WT_SESSION_IMPL *session, * restored, obviously. */ do { - if (upd->type == WT_UPDATE_RESERVED) + if (WT_UPDATE_RESERVED_ISSET(upd)) continue; cursor->set_key(cursor, btree_id, &las_addr, ++las_counter, list->onpage_txn, key); - if (upd->type == WT_UPDATE_DELETED) + if (WT_UPDATE_DELETED_ISSET(upd)) las_value.size = 0; else { las_value.data = WT_UPDATE_DATA(upd); las_value.size = upd->size; } cursor->set_value( - cursor, upd->txnid, upd->type, &las_value); + cursor, upd->txnid, upd->size, &las_value); WT_ERR(cursor->insert(cursor)); ++insert_cnt; @@ -4615,7 +4614,7 @@ record_loop: /* update_no_copy = true; /* No data copy */ repeat_count = 1; /* Single record */ - deleted = upd->type == WT_UPDATE_DELETED; + deleted = WT_UPDATE_DELETED_ISSET(upd); if (!deleted) { data = WT_UPDATE_DATA(upd); size = upd->size; @@ -4850,7 +4849,7 @@ compare: /* } } else { deleted = upd == NULL || - upd->type == WT_UPDATE_DELETED; + WT_UPDATE_DELETED_ISSET(upd); if (!deleted) { data = WT_UPDATE_DATA(upd); size = upd->size; @@ -5395,7 +5394,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, __wt_ovfl_cache(session, page, rip, vpack)); /* If this key/value pair was deleted, we're done. */ - if (upd->type == WT_UPDATE_DELETED) { + if (WT_UPDATE_DELETED_ISSET(upd)) { /* * Overflow keys referencing discarded values * are no longer useful, discard the backing @@ -5605,7 +5604,7 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) for (; ins != NULL; ins = WT_SKIP_NEXT(ins)) { /* Look for an update. */ WT_RET(__rec_txn_read(session, r, ins, NULL, NULL, &upd)); - if (upd == NULL || upd->type == WT_UPDATE_DELETED) + if (upd == NULL || WT_UPDATE_DELETED_ISSET(upd)) continue; if (upd->size == 0) /* Build value cell. */ diff --git a/src/support/stat.c b/src/support/stat.c index bc40244f5e6..8711e6b9bc1 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -97,11 +97,9 @@ static const char * const __stats_dsrc_desc[] = { "cursor: cursor-remove key bytes removed", "cursor: cursor-update value bytes updated", "cursor: insert calls", - "cursor: modify calls", "cursor: next calls", "cursor: prev calls", "cursor: remove calls", - "cursor: reserve calls", "cursor: reset calls", "cursor: restarted searches", "cursor: search calls", @@ -261,11 +259,9 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats) stats->cursor_remove_bytes = 0; stats->cursor_update_bytes = 0; stats->cursor_insert = 0; - stats->cursor_modify = 0; stats->cursor_next = 0; stats->cursor_prev = 0; stats->cursor_remove = 0; - stats->cursor_reserve = 0; stats->cursor_reset = 0; stats->cursor_restart = 0; stats->cursor_search = 0; @@ -414,11 +410,9 @@ __wt_stat_dsrc_aggregate_single( to->cursor_remove_bytes += from->cursor_remove_bytes; to->cursor_update_bytes += from->cursor_update_bytes; to->cursor_insert += from->cursor_insert; - to->cursor_modify += from->cursor_modify; to->cursor_next += from->cursor_next; to->cursor_prev += from->cursor_prev; to->cursor_remove += from->cursor_remove; - to->cursor_reserve += from->cursor_reserve; to->cursor_reset += from->cursor_reset; to->cursor_restart += from->cursor_restart; to->cursor_search += from->cursor_search; @@ -594,11 +588,9 @@ __wt_stat_dsrc_aggregate( to->cursor_remove_bytes += WT_STAT_READ(from, cursor_remove_bytes); to->cursor_update_bytes += WT_STAT_READ(from, cursor_update_bytes); to->cursor_insert += WT_STAT_READ(from, cursor_insert); - to->cursor_modify += WT_STAT_READ(from, cursor_modify); to->cursor_next += WT_STAT_READ(from, cursor_next); to->cursor_prev += WT_STAT_READ(from, cursor_prev); to->cursor_remove += WT_STAT_READ(from, cursor_remove); - to->cursor_reserve += WT_STAT_READ(from, cursor_reserve); to->cursor_reset += WT_STAT_READ(from, cursor_reset); to->cursor_restart += WT_STAT_READ(from, cursor_restart); to->cursor_search += WT_STAT_READ(from, cursor_search); @@ -749,11 +741,9 @@ static const char * const __stats_connection_desc[] = { "connection: total write I/Os", "cursor: cursor create calls", "cursor: cursor insert calls", - "cursor: cursor modify calls", "cursor: cursor next calls", "cursor: cursor prev calls", "cursor: cursor remove calls", - "cursor: cursor reserve calls", "cursor: cursor reset calls", "cursor: cursor restarted searches", "cursor: cursor search calls", @@ -1044,11 +1034,9 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->write_io = 0; stats->cursor_create = 0; stats->cursor_insert = 0; - stats->cursor_modify = 0; stats->cursor_next = 0; stats->cursor_prev = 0; stats->cursor_remove = 0; - stats->cursor_reserve = 0; stats->cursor_reset = 0; stats->cursor_restart = 0; stats->cursor_search = 0; @@ -1359,11 +1347,9 @@ __wt_stat_connection_aggregate( to->write_io += WT_STAT_READ(from, write_io); to->cursor_create += WT_STAT_READ(from, cursor_create); to->cursor_insert += WT_STAT_READ(from, cursor_insert); - to->cursor_modify += WT_STAT_READ(from, cursor_modify); to->cursor_next += WT_STAT_READ(from, cursor_next); to->cursor_prev += WT_STAT_READ(from, cursor_prev); to->cursor_remove += WT_STAT_READ(from, cursor_remove); - to->cursor_reserve += WT_STAT_READ(from, cursor_reserve); to->cursor_reset += WT_STAT_READ(from, cursor_reset); to->cursor_restart += WT_STAT_READ(from, cursor_restart); to->cursor_search += WT_STAT_READ(from, cursor_search); diff --git a/src/txn/txn.c b/src/txn/txn.c index d9edbb80564..ac4be37f855 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -600,7 +600,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) * Switch reserved operations to abort to simplify * obsolete update list truncation. */ - if (op->u.upd->type == WT_UPDATE_RESERVED) + if (WT_UPDATE_RESERVED_ISSET(op->u.upd)) op->u.upd->txnid = WT_TXN_ABORTED; break; case WT_TXN_OP_REF: diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c index 74dc679a6ef..fae2027e1ec 100644 --- a/src/txn/txn_log.c +++ b/src/txn/txn_log.c @@ -82,12 +82,12 @@ __txn_op_log(WT_SESSION_IMPL *session, * or update, all of which require log records. We shouldn't ever log * reserve operations. */ - WT_ASSERT(session, upd->type != WT_UPDATE_RESERVED); + WT_ASSERT(session, !WT_UPDATE_RESERVED_ISSET(upd)); if (cbt->btree->type == BTREE_ROW) { #ifdef HAVE_DIAGNOSTIC __txn_op_log_row_key_check(session, cbt); #endif - if (upd->type == WT_UPDATE_DELETED) + if (WT_UPDATE_DELETED_ISSET(upd)) WT_RET(__wt_logop_row_remove_pack( session, logrec, op->fileid, &cursor->key)); else @@ -97,7 +97,7 @@ __txn_op_log(WT_SESSION_IMPL *session, recno = WT_INSERT_RECNO(cbt->ins); WT_ASSERT(session, recno != WT_RECNO_OOB); - if (upd->type == WT_UPDATE_DELETED) + if (WT_UPDATE_DELETED_ISSET(upd)) WT_RET(__wt_logop_col_remove_pack( session, logrec, op->fileid, recno)); else diff --git a/test/csuite/scope/main.c b/test/csuite/scope/main.c index 83d6bd479d9..8b9a79decd0 100644 --- a/test/csuite/scope/main.c +++ b/test/csuite/scope/main.c @@ -28,7 +28,7 @@ #include "test_util.h" #define KEY "key" -#define VALUE "value,value,value" +#define VALUE "value" static int ignore_errors; @@ -63,55 +63,44 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) { struct { const char *op; - enum { INSERT, MODIFY, SEARCH, SEARCH_NEAR, + enum { INSERT, SEARCH, SEARCH_NEAR, REMOVE, REMOVE_POS, RESERVE, UPDATE } func; const char *config; } *op, ops[] = { /* - * The ops order is specific: insert has to happen first so - * other operations are possible, and remove has to be last. + * The ops order is fixed and shouldn't change, that is, insert + * has to happen first so search, update and remove operations + * are possible, and remove has to be last. */ { "insert", INSERT, NULL, }, { "search", SEARCH, NULL, }, { "search", SEARCH_NEAR, NULL, }, { "reserve", RESERVE, NULL, }, - { "insert", MODIFY, NULL, }, { "update", UPDATE, NULL, }, { "remove", REMOVE, NULL, }, { "remove", REMOVE_POS, NULL, }, { NULL, INSERT, NULL } }; WT_CURSOR *cursor; -#define MODIFY_ENTRIES 2 - WT_MODIFY entries[MODIFY_ENTRIES]; - WT_ITEM vu; uint64_t keyr; - const char *key, *vs; + const char *key, *value; char keybuf[100], valuebuf[100]; int exact; - bool recno, vstring; + bool recno; /* Reserve requires a running transaction. */ testutil_check(session->begin_transaction(session, NULL)); cursor = NULL; for (op = ops; op->op != NULL; op++) { - key = vs = NULL; - memset(&vu, 0, sizeof(vu)); + key = value = NULL; /* Open a cursor. */ if (cursor != NULL) testutil_check(cursor->close(cursor)); testutil_check(session->open_cursor( session, uri, NULL, op->config, &cursor)); - - /* Operations change based on the key/value formats. */ recno = strcmp(cursor->key_format, "r") == 0; - vstring = strcmp(cursor->value_format, "S") == 0; - - /* Modify is only possible with "item" values. */ - if (vstring && op->func == MODIFY) - continue; /* * Set up application buffers so we can detect overwrites @@ -125,12 +114,7 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) cursor->set_key(cursor, keybuf); } strcpy(valuebuf, VALUE); - if (vstring) - cursor->set_value(cursor, valuebuf); - else { - vu.size = strlen(vu.data = valuebuf); - cursor->set_value(cursor, &vu); - } + cursor->set_value(cursor, valuebuf); /* * The application must keep key and value memory valid until @@ -143,20 +127,6 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) case INSERT: testutil_check(cursor->insert(cursor)); break; - case MODIFY: - /* Modify, but don't really change anything. */ - entries[0].data.data = &VALUE[0]; - entries[0].data.size = 2; - entries[0].offset = 0; - entries[0].size = 2; - entries[1].data.data = &VALUE[3]; - entries[1].data.size = 5; - entries[1].offset = 3; - entries[1].size = 5; - - testutil_check( - cursor->modify(cursor, entries, MODIFY_ENTRIES)); - break; case SEARCH: testutil_check(cursor->search(cursor)); break; @@ -210,12 +180,7 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) else testutil_assert( cursor->get_key(cursor, &key) != 0); - if (vstring) - testutil_assert( - cursor->get_value(cursor, &vs) != 0); - else - testutil_assert( - cursor->get_value(cursor, &vu) != 0); + testutil_assert(cursor->get_value(cursor, &value) != 0); testutil_assert(ignore_errors == 0); break; case REMOVE_POS: @@ -236,22 +201,16 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) testutil_assert(strcmp(key, KEY) == 0); } ignore_errors = 1; - if (vstring) - testutil_assert( - cursor->get_value(cursor, &vs) != 0); - else - testutil_assert( - cursor->get_value(cursor, &vu) != 0); + testutil_assert(cursor->get_value(cursor, &value) != 0); testutil_assert(ignore_errors == 0); break; - case MODIFY: case RESERVE: case SEARCH: case SEARCH_NEAR: case UPDATE: /* - * Modify, reserve, search, search-near and update all - * position the cursor and have both a key and value. + * Reserve, search, search-near and update position the + * cursor and have both a key and value. * * Any key/value should not reference application * memory. @@ -266,19 +225,9 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) testutil_assert(key != keybuf); testutil_assert(strcmp(key, KEY) == 0); } - if (vstring) { - testutil_assert( - cursor->get_value(cursor, &vs) == 0); - testutil_assert(vs != valuebuf); - testutil_assert(strcmp(vs, VALUE) == 0); - } else { - testutil_assert( - cursor->get_value(cursor, &vu) == 0); - testutil_assert(vu.data != valuebuf); - testutil_assert(vu.size == strlen(VALUE)); - testutil_assert( - memcmp(vu.data, VALUE, strlen(VALUE)) == 0); - } + testutil_assert(cursor->get_value(cursor, &value) == 0); + testutil_assert(value != valuebuf); + testutil_assert(strcmp(value, VALUE) == 0); break; } @@ -290,16 +239,9 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) if (recno) cursor->set_key(cursor, (uint64_t)1); else { - strcpy(keybuf, KEY); - cursor->set_key(cursor, keybuf); - } - strcpy(valuebuf, VALUE); - if (vstring) - cursor->set_value(cursor, valuebuf); - else { - vu.size = strlen(vu.data = valuebuf); - cursor->set_value(cursor, &vu); + cursor->set_key(cursor, KEY); } + cursor->set_value(cursor, VALUE); testutil_check(cursor->insert(cursor)); } } @@ -330,19 +272,11 @@ main(int argc, char *argv[]) wiredtiger_open(opts->home, &event_handler, "create", &opts->conn)); run(opts->conn, "file:file.SS", "key_format=S,value_format=S"); - run(opts->conn, "file:file.Su", "key_format=S,value_format=u"); run(opts->conn, "file:file.rS", "key_format=r,value_format=S"); - run(opts->conn, "file:file.ru", "key_format=r,value_format=u"); - run(opts->conn, "lsm:lsm.SS", "key_format=S,value_format=S"); - run(opts->conn, "lsm:lsm.Su", "key_format=S,value_format=S"); run(opts->conn, "lsm:lsm.rS", "key_format=r,value_format=S"); - run(opts->conn, "lsm:lsm.ru", "key_format=r,value_format=S"); - run(opts->conn, "table:table.SS", "key_format=S,value_format=S"); - run(opts->conn, "table:table.Su", "key_format=S,value_format=u"); run(opts->conn, "table:table.rS", "key_format=r,value_format=S"); - run(opts->conn, "table:table.ru", "key_format=r,value_format=u"); testutil_cleanup(opts); diff --git a/test/format/config.c b/test/format/config.c index 2685438af00..ce1dc6d6e8e 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -489,8 +489,6 @@ config_pct(void) #define CONFIG_DELETE_ENTRY 0 { "delete_pct", &g.c_delete_pct, 0 }, { "insert_pct", &g.c_insert_pct, 0 }, -#define CONFIG_MODIFY_ENTRY 2 - { "modify_pct", &g.c_modify_pct, 0 }, { "read_pct", &g.c_read_pct, 0 }, { "write_pct", &g.c_write_pct, 0 }, }; @@ -510,16 +508,6 @@ config_pct(void) testutil_die(EINVAL, "operation percentages total to more than 100%%"); - /* Cursor modify isn't possible for fixed-length column store. */ - if (g.type == FIX) { - if (config_is_perm("modify_pct")) - testutil_die(EINVAL, - "WT_CURSOR.modify not supported by fixed-length " - "column store or LSM"); - list[CONFIG_MODIFY_ENTRY].order = 0; - *list[CONFIG_MODIFY_ENTRY].vp = 0; - } - /* * If the delete percentage isn't nailed down, periodically set it to * 0 so salvage gets run. Don't do it on the first run, all our smoke @@ -559,9 +547,8 @@ config_pct(void) list[max_slot].order = 0; pct -= *list[max_slot].vp; } - - testutil_assert(g.c_delete_pct + g.c_insert_pct + - g.c_modify_pct + g.c_read_pct + g.c_write_pct == 100); + testutil_assert(g.c_delete_pct + + g.c_insert_pct + g.c_read_pct + g.c_write_pct == 100); } /* diff --git a/test/format/config.h b/test/format/config.h index 3a41411e104..bc809a764ce 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -238,10 +238,6 @@ static CONFIG c[] = { "configure for mmap operations", /* 90% */ C_BOOL, 90, 0, 0, &g.c_mmap, NULL }, - { "modify_pct", - "percent operations that are value modifications", - C_IGNORE, 0, 0, 100, &g.c_modify_pct, NULL }, - { "ops", "the number of modification operations done per run", 0x0, 0, M(2), M(100), &g.c_ops, NULL }, @@ -327,7 +323,7 @@ static CONFIG c[] = { C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_config_open }, { "write_pct", - "percent operations that are value updates", + "percent operations that are writes", C_IGNORE, 0, 0, 100, &g.c_write_pct, NULL }, { NULL, NULL, 0x0, 0, 0, 0, NULL, NULL } diff --git a/test/format/format.h b/test/format/format.h index 104ee1553f4..fa898e439be 100644 --- a/test/format/format.h +++ b/test/format/format.h @@ -78,8 +78,6 @@ #define FORMAT_OPERATION_REPS 3 /* 3 thread operations sets */ -#define MAX_MODIFY_ENTRIES 5 /* maximum change vectors */ - typedef struct { char *home; /* Home directory */ char *home_backup; /* Hot-backup directory */ @@ -149,28 +147,28 @@ typedef struct { uint32_t c_bloom_hash_count; uint32_t c_bloom_oldest; uint32_t c_cache; + uint32_t c_compact; uint32_t c_checkpoints; - char *c_checksum; + char *c_checksum; uint32_t c_chunk_size; - uint32_t c_compact; - char *c_compression; - char *c_config_open; + char *c_compression; + char *c_encryption; + char *c_config_open; uint32_t c_data_extend; - char *c_data_source; + char *c_data_source; uint32_t c_delete_pct; uint32_t c_dictionary; uint32_t c_direct_io; - char *c_encryption; uint32_t c_evict_max; - char *c_file_type; uint32_t c_firstfit; + char *c_file_type; uint32_t c_huffman_key; uint32_t c_huffman_value; uint32_t c_in_memory; uint32_t c_insert_pct; uint32_t c_internal_key_truncation; uint32_t c_intl_page_max; - char *c_isolation; + char *c_isolation; uint32_t c_key_gap; uint32_t c_key_max; uint32_t c_key_min; @@ -178,23 +176,22 @@ typedef struct { uint32_t c_leak_memory; uint32_t c_logging; uint32_t c_logging_archive; - char *c_logging_compression; + char *c_logging_compression; uint32_t c_logging_prealloc; uint32_t c_long_running_txn; uint32_t c_lsm_worker_threads; uint32_t c_merge_max; uint32_t c_mmap; - uint32_t c_modify_pct; uint32_t c_ops; + uint32_t c_quiet; uint32_t c_prefix_compression; uint32_t c_prefix_compression_min; - uint32_t c_quiet; - uint32_t c_read_pct; - uint32_t c_rebalance; uint32_t c_repeat_data_pct; uint32_t c_reverse; uint32_t c_rows; uint32_t c_runs; + uint32_t c_read_pct; + uint32_t c_rebalance; uint32_t c_salvage; uint32_t c_split_pct; uint32_t c_statistics; diff --git a/test/format/ops.c b/test/format/ops.c index 02cce77eec2..6e3e3b783c5 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -29,8 +29,6 @@ #include "format.h" static int col_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); -static int col_modify( - TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); static int col_reserve(WT_CURSOR *, uint64_t, bool); static int col_update( @@ -39,8 +37,6 @@ static int nextprev(WT_CURSOR *, int); static void *ops(void *); static int row_insert( TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); -static int row_modify( - TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); static int row_reserve(WT_CURSOR *, WT_ITEM *, uint64_t, bool); static int row_update( @@ -407,7 +403,7 @@ snap_check(WT_CURSOR *cursor, static void * ops(void *arg) { - enum { INSERT, MODIFY, READ, REMOVE, UPDATE } op; + enum { INSERT, READ, REMOVE, UPDATE } op; SNAP_OPS *snap, snap_list[64]; TINFO *tinfo; WT_CONNECTION *conn; @@ -614,12 +610,11 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ op = REMOVE; else if (i < g.c_delete_pct + g.c_insert_pct) op = INSERT; - else if (i < g.c_delete_pct + - g.c_insert_pct + g.c_modify_pct) - op = MODIFY; - else if (i < g.c_delete_pct + - g.c_insert_pct + g.c_modify_pct + g.c_write_pct) + else if (i < + g.c_delete_pct + g.c_insert_pct + g.c_write_pct) op = UPDATE; + else + op = READ; } /* @@ -703,30 +698,6 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ testutil_assert(ret == 0 || ret == WT_ROLLBACK); } break; - case MODIFY: - ++tinfo->update; - switch (g.type) { - case ROW: - ret = row_modify(tinfo, cursor, - key, value, keyno, positioned); - break; - case VAR: - ret = col_modify(tinfo, cursor, - key, value, keyno, positioned); - break; - } - if (ret == 0) { - positioned = true; - if (SNAP_TRACK) - snap_track(snap++, keyno, NULL, value); - } else { - positioned = false; - if (ret == WT_ROLLBACK && intxn) - goto deadlock; - testutil_assert(ret == 0 || - ret == WT_NOTFOUND || ret == WT_ROLLBACK); - } - break; case READ: ++tinfo->search; ret = read_row(cursor, key, value, keyno); @@ -771,15 +742,17 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ case UPDATE: update_instead_of_insert: ++tinfo->update; + + /* Update the row. */ switch (g.type) { case ROW: - ret = row_update(tinfo, cursor, - key, value, keyno, positioned); + ret = row_update(tinfo, + cursor, key, value, keyno, positioned); break; case FIX: case VAR: - ret = col_update(tinfo, cursor, - key, value, keyno, positioned); + ret = col_update(tinfo, + cursor, key, value, keyno, positioned); break; } if (ret == 0) { @@ -1195,235 +1168,6 @@ col_reserve(WT_CURSOR *cursor, uint64_t keyno, bool positioned) return (0); } -/* - * modify_build -- - * Generate a set of modify vectors, and copy what the final result - * should be into the value buffer. - */ -static bool -modify_build(TINFO *tinfo, - WT_CURSOR *cursor, WT_MODIFY *entries, int *nentriesp, WT_ITEM *value) -{ - static char repl[64]; - size_t len, size; - u_int i, nentries; - WT_ITEM *ta, _ta, *tb, _tb, *tmp; - - if (repl[0] == '\0') - memset(repl, '+', sizeof(repl)); - - ta = &_ta; - memset(ta, 0, sizeof(*ta)); - tb = &_tb; - memset(tb, 0, sizeof(*tb)); - - testutil_check(cursor->get_value(cursor, value)); - - /* - * Randomly select a number of byte changes, offsets and lengths. Start - * at least 11 bytes in so we skip the leading key information. - */ - nentries = mmrand(&tinfo->rnd, 1, MAX_MODIFY_ENTRIES); - for (i = 0; i < nentries; ++i) { - entries[i].data.data = repl; - entries[i].data.size = (size_t)mmrand(&tinfo->rnd, 0, 10); - entries[i].offset = (size_t)mmrand(&tinfo->rnd, 20, 40); - entries[i].size = (size_t)mmrand(&tinfo->rnd, 0, 10); - } - - /* - * Process the entries to figure out how large a buffer we need. This is - * a bit pessimistic because we're ignoring replacement bytes, but it's - * a simpler calculation. - */ - for (size = cursor->value.size, i = 0; i < nentries; ++i) { - if (entries[i].offset >= size) - size = entries[i].offset; - size += entries[i].data.size; - } - - /* If size is larger than the available buffer size, skip this one. */ - if (size >= value->memsize) - return (false); - - /* Allocate a pair of buffers. */ - ta->mem = dcalloc(size, sizeof(uint8_t)); - tb->mem = dcalloc(size, sizeof(uint8_t)); - - /* - * Use a brute-force process to create the value WiredTiger will create - * from this change vector. Don't do anything tricky to speed it up, we - * want to use a different algorithm from WiredTiger's, the idea is to - * bug-check the library. - */ - memcpy(ta->mem, value->data, value->size); - ta->size = value->size; - for (i = 0; i < nentries; ++i) { - /* Take leading bytes from the original, plus any gap bytes. */ - if (entries[i].offset >= ta->size) { - memcpy(tb->mem, ta->mem, ta->size); - if (entries[i].offset > ta->size) - memset((uint8_t *)tb->mem + ta->size, - '\0', entries[i].offset - ta->size); - } else - if (entries[i].offset > 0) - memcpy(tb->mem, ta->mem, entries[i].offset); - tb->size = entries[i].offset; - - /* Take replacement bytes. */ - if (entries[i].data.size > 0) { - memcpy((uint8_t *)tb->mem + tb->size, - entries[i].data.data, entries[i].data.size); - tb->size += entries[i].data.size; - } - - /* Take trailing bytes from the original. */ - len = entries[i].offset + entries[i].size; - if (ta->size > len) { - memcpy((uint8_t *)tb->mem + tb->size, - (uint8_t *)ta->mem + len, ta->size - len); - tb->size += ta->size - len; - } - testutil_assert(tb->size <= size); - - tmp = ta; - ta = tb; - tb = tmp; - } - - /* Copy the expected result into the value structure. */ - memcpy(value->mem, ta->mem, ta->size); - value->data = value->mem; - value->size = ta->size; - - free(ta->mem); - free(tb->mem); - - *nentriesp = (int)nentries; - return (true); -} - -/* - * row_modify -- - * Modify a row in a row-store file. - */ -static int -row_modify(TINFO *tinfo, WT_CURSOR *cursor, - WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) -{ - WT_DECL_RET; - WT_MODIFY entries[MAX_MODIFY_ENTRIES]; - int nentries; - - if (!positioned) { - key_gen(key, keyno); - cursor->set_key(cursor, key); - switch (ret = cursor->search(cursor)) { - case 0: - break; - case WT_CACHE_FULL: - case WT_ROLLBACK: - return (WT_ROLLBACK); - case WT_NOTFOUND: - return (WT_NOTFOUND); - default: - testutil_die(ret, - "row_modify: read row %" PRIu64 " by key", keyno); - } - } - - /* - * Generate a set of change vectors and copy the expected result into - * the value buffer. If the return value is non-zero, there wasn't a - * big enough value to work with, or for some reason we couldn't build - * a reasonable change vector. - */ - ret = WT_NOTFOUND; - if (modify_build(tinfo, cursor, entries, &nentries, value)) - ret = cursor->modify(cursor, entries, nentries); - switch (ret) { - case 0: - break; - case WT_CACHE_FULL: - case WT_ROLLBACK: - return (WT_ROLLBACK); - case WT_NOTFOUND: - return (WT_NOTFOUND); - default: - testutil_die(ret, - "row_modify: modify row %" PRIu64 " by key", keyno); - } - -#ifdef HAVE_BERKELEY_DB - if (!SINGLETHREADED) - return (0); - - bdb_update(key->data, key->size, value->data, value->size); -#endif - return (0); -} - -/* - * col_modify -- - * Modify a row in a column-store file. - */ -static int -col_modify(TINFO *tinfo, WT_CURSOR *cursor, - WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) -{ - WT_DECL_RET; - WT_MODIFY entries[MAX_MODIFY_ENTRIES]; - int nentries; - - if (!positioned) { - cursor->set_key(cursor, keyno); - switch (ret = cursor->search(cursor)) { - case 0: - break; - case WT_CACHE_FULL: - case WT_ROLLBACK: - return (WT_ROLLBACK); - case WT_NOTFOUND: - return (WT_NOTFOUND); - default: - testutil_die(ret, - "col_modify: read row %" PRIu64, keyno); - } - } - - /* - * Generate a set of change vectors and copy the expected result into - * the value buffer. If the return value is non-zero, there wasn't a - * big enough value to work with, or for some reason we couldn't build - * a reasonable change vector. - */ - ret = WT_NOTFOUND; - if (modify_build(tinfo, cursor, entries, &nentries, value)) - ret = cursor->modify(cursor, entries, nentries); - switch (ret) { - case 0: - break; - case WT_CACHE_FULL: - case WT_ROLLBACK: - return (WT_ROLLBACK); - case WT_NOTFOUND: - return (WT_NOTFOUND); - default: - testutil_die(ret, "col_modify: modify row %" PRIu64, keyno); - } - -#ifdef HAVE_BERKELEY_DB - if (!SINGLETHREADED) - return (0); - - key_gen(key, keyno); - bdb_update(key->data, key->size, value->data, value->size); -#else - (void)key; /* [-Wunused-variable] */ -#endif - return (0); -} - /* * row_update -- * Update a row in a row-store file. diff --git a/test/suite/test_cursor12.py b/test/suite/test_cursor12.py deleted file mode 100644 index 827f37cfcef..00000000000 --- a/test/suite/test_cursor12.py +++ /dev/null @@ -1,165 +0,0 @@ -#!/usr/bin/env python -# -# Public Domain 2014-2017 MongoDB, Inc. -# Public Domain 2008-2014 WiredTiger, Inc. -# -# This is free and unencumbered software released into the public domain. -# -# Anyone is free to copy, modify, publish, use, compile, sell, or -# distribute this software, either in source code form or as a compiled -# binary, for any purpose, commercial or non-commercial, and by any -# means. -# -# In jurisdictions that recognize copyright laws, the author or authors -# of this software dedicate any and all copyright interest in the -# software to the public domain. We make this dedication for the benefit -# of the public at large and to the detriment of our heirs and -# successors. We intend this dedication to be an overt act of -# relinquishment in perpetuity of all present and future rights to this -# software under copyright law. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. - -import wiredtiger, wttest -from wtscenario import make_scenarios - -# test_cursor12.py -# Test cursor modify call -class test_cursor12(wttest.WiredTigerTestCase): - types = [ - ('file', dict(uri='file:modify')), - ('lsm', dict(uri='lsm:modify')), - ('table', dict(uri='table:modify')), - ] - scenarios = make_scenarios(types) - - # Smoke-test the modify API. - def test_modify_smoke(self): - # List with original value, final value, and modifications to get - # there. - list = [ - { - 'o' : 'ABCDEFGH', # no operation - 'f' : 'ABCDEFGH', - 'mods' : [['', 0, 0]] - },{ - 'o' : 'ABCDEFGH', # no operation with offset - 'f' : 'ABCDEFGH', - 'mods' : [['', 4, 0]] - },{ - 'o' : 'ABCDEFGH', # rewrite beginning - 'f' : '--CDEFGH', - 'mods' : [['--', 0, 2]] - },{ - 'o' : 'ABCDEFGH', # rewrite end - 'f' : 'ABCDEF--', - 'mods' : [['--', 6, 2]] - },{ - 'o' : 'ABCDEFGH', # append - 'f' : 'ABCDEFGH--', - 'mods' : [['--', 8, 2]] - },{ - 'o' : 'ABCDEFGH', # append with gap - 'f' : 'ABCDEFGH\00\00--', - 'mods' : [['--', 10, 2]] - },{ - 'o' : 'ABCDEFGH', # multiple replacements - 'f' : 'A-C-E-G-', - 'mods' : [['-', 1, 1], ['-', 3, 1], ['-', 5, 1], ['-', 7, 1]] - },{ - 'o' : 'ABCDEFGH', # multiple overlapping replacements - 'f' : 'A-CDEFGH', - 'mods' : [['+', 1, 1], ['+', 1, 1], ['+', 1, 1], ['-', 1, 1]] - },{ - 'o' : 'ABCDEFGH', # multiple overlapping gap replacements - 'f' : 'ABCDEFGH\00\00--', - 'mods' : [['+', 10, 1], ['+', 10, 1], ['+', 10, 1], ['--', 10, 2]] - },{ - 'o' : 'ABCDEFGH', # shrink beginning - 'f' : '--EFGH', - 'mods' : [['--', 0, 4]] - },{ - 'o' : 'ABCDEFGH', # shrink middle - 'f' : 'AB--GH', - 'mods' : [['--', 2, 4]] - },{ - 'o' : 'ABCDEFGH', # shrink end - 'f' : 'ABCD--', - 'mods' : [['--', 4, 4]] - },{ - 'o' : 'ABCDEFGH', # grow beginning - 'f' : '--ABCDEFGH', - 'mods' : [['--', 0, 0]] - },{ - 'o' : 'ABCDEFGH', # grow middle - 'f' : 'ABCD--EFGH', - 'mods' : [['--', 4, 0]] - },{ - 'o' : 'ABCDEFGH', # grow end - 'f' : 'ABCDEFGH--', - 'mods' : [['--', 8, 0]] - },{ - 'o' : 'ABCDEFGH', # discard beginning - 'f' : 'EFGH', - 'mods' : [['', 0, 4]] - },{ - 'o' : 'ABCDEFGH', # discard middle - 'f' : 'ABGH', - 'mods' : [['', 2, 4]] - },{ - 'o' : 'ABCDEFGH', # discard end - 'f' : 'ABCD', - 'mods' : [['', 4, 4]] - },{ - 'o' : 'ABCDEFGH', # overlap the end and append - 'f' : 'ABCDEF--XX', - 'mods' : [['--XX', 6, 2]] - },{ - 'o' : 'ABCDEFGH', # overlap the end with incorrect size - 'f' : 'ABCDEFG01234567', - 'mods' : [['01234567', 7, 2000]] - } - ] - - self.session.create(self.uri, 'key_format=S,value_format=u') - cursor = self.session.open_cursor(self.uri, None, None) - - # For each test in the list, set the original value, apply modifications - # in order, then confirm the final state. - for i in list: - cursor['ABC'] = i['o'] - - mods = [] - for j in i['mods']: - mod = wiredtiger.Modify(j[0], j[1], j[2]) - mods.append(mod) - - cursor.set_key('ABC') - cursor.modify(mods) - self.assertEquals(str(cursor['ABC']), i['f']) - - # Check that modify returns not-found after a delete. - def test_modify_delete(self): - self.session.create(self.uri, 'key_format=S,value_format=u') - cursor = self.session.open_cursor(self.uri, None, None) - cursor['ABC'] = 'ABCDEFGH' - cursor.set_key('ABC') - cursor.remove() - - mods = [] - mod = wiredtiger.Modify('ABCD', 3, 3) - mods.append(mod) - - cursor.set_key('ABC') - #self.assertEqual(cursor.modify(mods), wiredtiger.WT_NOTFOUND) - self.assertRaises( - wiredtiger.WiredTigerError, lambda:cursor.modify(mods)) - -if __name__ == '__main__': - wttest.run() -- cgit v1.2.1 From f7ac27044ef7a0332c68be16d51ad25077d4f8b2 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 24 May 2017 17:26:12 -0400 Subject: WT-3339 The CURSOR_UPDATE_API_CALL macro will dump core on a NULL btree handle (#3436) This is a problem for LSM or any non-standard data source that calls CURSOR_UPDATE_API_CALL with an in-memory configuration and a NULL btree handle. Split CURSOR_UPDATE_API_CALL into two versions, one of which expects a btree handle, one of which doesn't. Rename the argument to TXN_API_CALL_NOCONF from "bt" to "dh", it's a data handle in that macro, not a btree handle. --- src/cursor/cur_ds.c | 4 ++-- src/cursor/cur_file.c | 8 ++++---- src/cursor/cur_table.c | 6 +++--- src/include/api.h | 18 +++++++++++------- src/lsm/lsm_cursor.c | 6 +++--- 5 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/cursor/cur_ds.c b/src/cursor/cur_ds.c index e40cb30dd53..bb7a7a9994a 100644 --- a/src/cursor/cur_ds.c +++ b/src/cursor/cur_ds.c @@ -317,7 +317,7 @@ __curds_insert(WT_CURSOR *cursor) source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source; - CURSOR_UPDATE_API_CALL(cursor, session, insert, NULL); + CURSOR_UPDATE_API_CALL(cursor, session, insert); __curds_txn_enter(session); @@ -350,7 +350,7 @@ __curds_update(WT_CURSOR *cursor) source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source; - CURSOR_UPDATE_API_CALL(cursor, session, update, NULL); + CURSOR_UPDATE_API_CALL(cursor, session, update); WT_STAT_CONN_INCR(session, cursor_update); WT_STAT_DATA_INCR(session, cursor_update); diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index c43826799cf..f4d42802032 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -231,7 +231,7 @@ __curfile_insert(WT_CURSOR *cursor) WT_SESSION_IMPL *session; cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_UPDATE_API_CALL(cursor, session, insert, cbt->btree); + CURSOR_UPDATE_API_CALL_BTREE(cursor, session, insert, cbt->btree); if (!F_ISSET(cursor, WT_CURSTD_APPEND)) WT_ERR(__cursor_checkkey(cursor)); @@ -265,7 +265,7 @@ __wt_curfile_insert_check(WT_CURSOR *cursor) WT_SESSION_IMPL *session; cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_UPDATE_API_CALL(cursor, session, update, cbt->btree); + CURSOR_UPDATE_API_CALL_BTREE(cursor, session, update, cbt->btree); WT_ERR(__cursor_checkkey(cursor)); ret = __wt_btcur_insert_check(cbt); @@ -286,7 +286,7 @@ __curfile_update(WT_CURSOR *cursor) WT_SESSION_IMPL *session; cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_UPDATE_API_CALL(cursor, session, update, cbt->btree); + CURSOR_UPDATE_API_CALL_BTREE(cursor, session, update, cbt->btree); WT_ERR(__cursor_checkkey(cursor)); WT_ERR(__cursor_checkvalue(cursor)); @@ -345,7 +345,7 @@ __curfile_reserve(WT_CURSOR *cursor) WT_SESSION_IMPL *session; cbt = (WT_CURSOR_BTREE *)cursor; - CURSOR_UPDATE_API_CALL(cursor, session, reserve, cbt->btree); + CURSOR_UPDATE_API_CALL_BTREE(cursor, session, reserve, cbt->btree); WT_ERR(__cursor_checkkey(cursor)); WT_ERR(__wt_txn_context_check(session, true)); diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c index c6514aaac58..89c98986c0f 100644 --- a/src/cursor/cur_table.c +++ b/src/cursor/cur_table.c @@ -484,7 +484,7 @@ __curtable_insert(WT_CURSOR *cursor) u_int i; ctable = (WT_CURSOR_TABLE *)cursor; - JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, insert, NULL); + JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, insert); WT_ERR(__curtable_open_indices(ctable)); /* @@ -563,7 +563,7 @@ __curtable_update(WT_CURSOR *cursor) WT_SESSION_IMPL *session; ctable = (WT_CURSOR_TABLE *)cursor; - JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, update, NULL); + JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, update); WT_ERR(__curtable_open_indices(ctable)); /* @@ -673,7 +673,7 @@ __curtable_reserve(WT_CURSOR *cursor) WT_SESSION_IMPL *session; ctable = (WT_CURSOR_TABLE *)cursor; - JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, update, NULL); + JOINABLE_CURSOR_UPDATE_API_CALL(cursor, session, update); /* * We don't have to open the indices here, but it makes the code similar diff --git a/src/include/api.h b/src/include/api.h index fb0c41fe1c8..372ba063cd3 100644 --- a/src/include/api.h +++ b/src/include/api.h @@ -47,9 +47,9 @@ F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT) /* An API call wrapped in a transaction if necessary. */ -#define TXN_API_CALL_NOCONF(s, h, n, bt) do { \ +#define TXN_API_CALL_NOCONF(s, h, n, dh) do { \ bool __autotxn = false; \ - API_CALL_NOCONF(s, h, n, bt); \ + API_CALL_NOCONF(s, h, n, dh); \ __autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\ if (__autotxn) \ F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT) @@ -133,17 +133,21 @@ CURSOR_REMOVE_API_CALL(cur, s, bt); \ JOINABLE_CURSOR_CALL_CHECK(cur) -#define CURSOR_UPDATE_API_CALL(cur, s, n, bt) \ +#define CURSOR_UPDATE_API_CALL_BTREE(cur, s, n, bt) \ (s) = (WT_SESSION_IMPL *)(cur)->session; \ - TXN_API_CALL_NOCONF(s, WT_CURSOR, n, \ - ((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \ + TXN_API_CALL_NOCONF( \ + s, WT_CURSOR, n, ((WT_BTREE *)(bt))->dhandle); \ if (F_ISSET(S2C(s), WT_CONN_IN_MEMORY) && \ !F_ISSET((WT_BTREE *)(bt), WT_BTREE_IGNORE_CACHE) && \ __wt_cache_full(s)) \ WT_ERR(WT_CACHE_FULL); -#define JOINABLE_CURSOR_UPDATE_API_CALL(cur, s, n, bt) \ - CURSOR_UPDATE_API_CALL(cur, s, n, bt); \ +#define CURSOR_UPDATE_API_CALL(cur, s, n) \ + (s) = (WT_SESSION_IMPL *)(cur)->session; \ + TXN_API_CALL_NOCONF(s, WT_CURSOR, n, NULL); + +#define JOINABLE_CURSOR_UPDATE_API_CALL(cur, s, n) \ + CURSOR_UPDATE_API_CALL(cur, s, n); \ JOINABLE_CURSOR_CALL_CHECK(cur) #define CURSOR_UPDATE_API_END(s, ret) \ diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index e62d6cab584..90750a27ab3 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -1525,7 +1525,7 @@ __clsm_insert(WT_CURSOR *cursor) clsm = (WT_CURSOR_LSM *)cursor; - CURSOR_UPDATE_API_CALL(cursor, session, insert, NULL); + CURSOR_UPDATE_API_CALL(cursor, session, insert); WT_ERR(__cursor_needkey(cursor)); WT_ERR(__cursor_needvalue(cursor)); WT_ERR(__clsm_enter(clsm, false, true)); @@ -1574,7 +1574,7 @@ __clsm_update(WT_CURSOR *cursor) clsm = (WT_CURSOR_LSM *)cursor; - CURSOR_UPDATE_API_CALL(cursor, session, update, NULL); + CURSOR_UPDATE_API_CALL(cursor, session, update); WT_ERR(__cursor_needkey(cursor)); WT_ERR(__cursor_needvalue(cursor)); WT_ERR(__clsm_enter(clsm, false, true)); @@ -1674,7 +1674,7 @@ __clsm_reserve(WT_CURSOR *cursor) clsm = (WT_CURSOR_LSM *)cursor; - CURSOR_UPDATE_API_CALL(cursor, session, reserve, NULL); + CURSOR_UPDATE_API_CALL(cursor, session, reserve); WT_ERR(__cursor_needkey(cursor)); __cursor_novalue(cursor); WT_ERR(__wt_txn_context_check(session, true)); -- cgit v1.2.1 From 4641a4586fd18925b3e91881b7c5fd7a203c337b Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 25 May 2017 10:50:53 -0400 Subject: WT-2972 Add interface allowing partial updates to existing values (#3435) --- dist/s_define.list | 2 +- dist/s_string.ok | 3 + dist/stat_data.py | 6 +- examples/c/ex_all.c | 33 ++ .../java/com/wiredtiger/examples/ex_cursor.java | 41 +++ lang/java/Makefile.am | 2 + lang/java/java_doc.i | 1 + lang/java/wiredtiger.i | 233 ++++++++++++ lang/python/wiredtiger.i | 119 +++++- src/btree/bt_curnext.c | 8 +- src/btree/bt_curprev.c | 8 +- src/btree/bt_cursor.c | 87 +++-- src/btree/bt_debug.c | 4 +- src/btree/bt_delete.c | 2 +- src/btree/bt_read.c | 16 +- src/btree/bt_split.c | 6 +- src/btree/bt_stat.c | 20 +- src/btree/col_modify.c | 17 +- src/btree/row_modify.c | 20 +- src/cursor/cur_backup.c | 1 + src/cursor/cur_config.c | 1 + src/cursor/cur_ds.c | 1 + src/cursor/cur_dump.c | 1 + src/cursor/cur_file.c | 6 + src/cursor/cur_index.c | 1 + src/cursor/cur_join.c | 2 + src/cursor/cur_log.c | 1 + src/cursor/cur_metadata.c | 1 + src/cursor/cur_stat.c | 1 + src/cursor/cur_std.c | 115 ++++++ src/cursor/cur_table.c | 2 + src/docs/Doxyfile | 1 + src/include/btmem.h | 37 +- src/include/cursor.h | 2 + src/include/extern.h | 7 +- src/include/log.h | 2 +- src/include/lsm.h | 4 +- src/include/mutex.h | 4 +- src/include/mutex.i | 8 +- src/include/schema.h | 2 +- src/include/stat.h | 4 + src/include/txn.i | 2 +- src/include/verify_build.h | 1 + src/include/wiredtiger.in | 406 ++++++++++++--------- src/lsm/lsm_cursor.c | 1 + src/reconcile/rec_write.c | 23 +- src/support/stat.c | 14 + src/txn/txn.c | 2 +- src/txn/txn_log.c | 6 +- test/csuite/scope/main.c | 102 +++++- test/format/config.c | 17 +- test/format/config.h | 6 +- test/format/format.h | 27 +- test/format/ops.c | 278 +++++++++++++- test/suite/test_cursor12.py | 165 +++++++++ 55 files changed, 1527 insertions(+), 355 deletions(-) create mode 100644 test/suite/test_cursor12.py diff --git a/dist/s_define.list b/dist/s_define.list index b7f124ef18c..9f94132f584 100644 --- a/dist/s_define.list +++ b/dist/s_define.list @@ -58,7 +58,7 @@ WT_STAT_INCRV_BASE WT_STAT_WRITE WT_TIMEDIFF_US WT_TRET_ERROR_OK -WT_UPDATE_RESERVED_VALUE +WT_UPDATE_SIZE WT_WITH_LOCK_NOWAIT WT_WITH_LOCK_WAIT __F diff --git a/dist/s_string.ok b/dist/s_string.ok index ac21c61a8ef..d5a562fcbd1 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -344,6 +344,7 @@ Split's Stoica StoreLoad StoreStore +Su Syscall TAILQ TCMalloc @@ -949,6 +950,7 @@ nd needkey needvalue negint +nentries newbar newfile newuri @@ -1092,6 +1094,7 @@ rotN rotn rp rpc +ru run's runtime rwlock diff --git a/dist/stat_data.py b/dist/stat_data.py index 4a147ca44eb..203a88fb055 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -231,8 +231,8 @@ connection_stats = [ CacheStat('cache_eviction_walks_abandoned', 'eviction walks abandoned'), CacheStat('cache_eviction_walks_active', 'files with active eviction walks', 'no_clear,no_scale'), CacheStat('cache_eviction_walks_started', 'files with new eviction walks started'), - CacheStat('cache_eviction_worker_evicting', 'eviction worker thread evicting pages'), CacheStat('cache_eviction_worker_created', 'eviction worker thread created'), + CacheStat('cache_eviction_worker_evicting', 'eviction worker thread evicting pages'), CacheStat('cache_eviction_worker_removed', 'eviction worker thread removed'), CacheStat('cache_hazard_checks', 'hazard pointer check calls'), CacheStat('cache_hazard_max', 'hazard pointer maximum array length', 'max_aggregate,no_scale'), @@ -262,9 +262,11 @@ connection_stats = [ ########################################## CursorStat('cursor_create', 'cursor create calls'), CursorStat('cursor_insert', 'cursor insert calls'), + CursorStat('cursor_modify', 'cursor modify calls'), CursorStat('cursor_next', 'cursor next calls'), CursorStat('cursor_prev', 'cursor prev calls'), CursorStat('cursor_remove', 'cursor remove calls'), + CursorStat('cursor_reserve', 'cursor reserve calls'), CursorStat('cursor_reset', 'cursor reset calls'), CursorStat('cursor_restart', 'cursor restarted searches'), CursorStat('cursor_search', 'cursor search calls'), @@ -553,10 +555,12 @@ dsrc_stats = [ CursorStat('cursor_insert', 'insert calls'), CursorStat('cursor_insert_bulk', 'bulk-loaded cursor-insert calls'), CursorStat('cursor_insert_bytes', 'cursor-insert key and value bytes inserted', 'size'), + CursorStat('cursor_modify', 'modify calls'), CursorStat('cursor_next', 'next calls'), CursorStat('cursor_prev', 'prev calls'), CursorStat('cursor_remove', 'remove calls'), CursorStat('cursor_remove_bytes', 'cursor-remove key bytes removed', 'size'), + CursorStat('cursor_reserve', 'reserve calls'), CursorStat('cursor_reset', 'reset calls'), CursorStat('cursor_restart', 'restarted searches'), CursorStat('cursor_search', 'search calls'), diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c index f94863584e8..5e1fa4bbcc5 100644 --- a/examples/c/ex_all.c +++ b/examples/c/ex_all.c @@ -308,6 +308,39 @@ cursor_ops(WT_SESSION *session) /*! [Reserve a record] */ } + { + /*! [Modify an existing record] */ + WT_MODIFY entries[3]; + const char *key = "some key"; + ret = session->open_cursor( + session, "table:mytable", NULL, NULL, &cursor); + + /* Position the cursor. */ + cursor->set_key(cursor, key); + ret = cursor->search(cursor); + + /* Replace 20 bytes starting at byte offset 5. */ + entries[0].data.data = "some data"; + entries[0].data.size = strlen(entries[0].data.data); + entries[0].offset = 5; + entries[0].size = 20; + + /* Insert data at byte offset 40. */ + entries[1].data.data = "and more data"; + entries[1].data.size = strlen(entries[1].data.data); + entries[1].offset = 40; + entries[1].size = 0; + + /* Replace 2 bytes starting at byte offset 10. */ + entries[2].data.data = "and more data"; + entries[2].data.size = strlen(entries[2].data.data); + entries[2].offset = 10; + entries[2].size = 2; + + ret = cursor->modify(cursor, entries, 3); + /*! [Modify an existing record] */ + } + { /*! [Update an existing record or insert a new record] */ const char *key = "some key", *value = "some value"; diff --git a/examples/java/com/wiredtiger/examples/ex_cursor.java b/examples/java/com/wiredtiger/examples/ex_cursor.java index 498ace12865..4a57f3c35da 100644 --- a/examples/java/com/wiredtiger/examples/ex_cursor.java +++ b/examples/java/com/wiredtiger/examples/ex_cursor.java @@ -156,6 +156,41 @@ public class ex_cursor { } /*! [cursor remove] */ + /*! [cursor modify] */ + public static int + cursor_modify(Cursor cursor) + throws WiredTigerException + { + byte orig[] = new byte[4]; + for (int i = 0; i < 4; i++) + orig[i] = (byte)i; + cursor.putKeyString("key"); + cursor.putValueByteArray(orig); + cursor.insert(); // 0x0 0x1 0x2 0x3 + + byte b10[] = new byte[4]; + for (int i = 0; i < 4; i++) + b10[i] = (byte)(0x10 + i); + byte b20[] = new byte[4]; + for (int i = 0; i < 4; i++) + b20[i] = (byte)(0x20 + i); + + Modify modlist[] = new Modify[2]; + // The following Modify replaces one byte at position one by: + // (0x10 0x11 0x12 0x13), leaving: + // 0x0 0x10 0x11 0x12 0x13 0x2 0x3 + modlist[0] = new Modify(b10, 1, 1); + + // The following Modify replaces one byte at position three by: + // (0x20 0x21 0x22 0x23), leaving: + // 0x0 0x10 0x11 0x20 0x21 0x22 0x23 0x13 0x2 0x3 + modlist[1] = new Modify(b20, 3, 1); + + cursor.putKeyString("key"); + return (cursor.modify(modlist)); + } + /*! [cursor modify] */ + public static int cursorExample() throws WiredTigerException @@ -219,6 +254,12 @@ public class ex_cursor { ret = cursor_remove(cursor); ret = cursor.close(); + /* Create a table with a raw value to illustrate certain operations. */ + ret = session.create("table:raw", "key_format=S,value_format=u"); + cursor = session.open_cursor("table:raw", null, null); + ret = cursor_modify(cursor); + ret = cursor.close(); + /* Note: closing the connection implicitly closes open session(s). */ if ((ret = conn.close(null)) != 0) System.err.println("Error connecting to " + home + ": " + diff --git a/lang/java/Makefile.am b/lang/java/Makefile.am index 2ff822a5d08..71515c430fd 100644 --- a/lang/java/Makefile.am +++ b/lang/java/Makefile.am @@ -18,6 +18,7 @@ JAVA_SRC = \ $(JAVADESTFULL)/AsyncOpType.java \ $(JAVADESTFULL)/Connection.java \ $(JAVADESTFULL)/Cursor.java \ + $(JAVADESTFULL)/Modify.java \ $(JAVADESTFULL)/SearchStatus.java \ $(JAVADESTFULL)/PackFormatInputStream.java \ $(JAVADESTFULL)/PackInputStream.java \ @@ -31,6 +32,7 @@ JAVA_SRC = \ $(JAVADESTFULL)/wiredtiger.java \ $(JAVADESTFULL)/wiredtigerConstants.java \ $(JAVADESTFULL)/wiredtigerJNI.java \ + $(JAVADESTFULL)/WT_MODIFY_LIST.java \ $(JAVAEXAMPLES)/ex_access.java \ $(JAVAEXAMPLES)/ex_all.java \ $(JAVAEXAMPLES)/ex_async.java \ diff --git a/lang/java/java_doc.i b/lang/java/java_doc.i index 8088abbf065..f9e017ee43a 100644 --- a/lang/java/java_doc.i +++ b/lang/java/java_doc.i @@ -12,6 +12,7 @@ COPYDOC(__wt_cursor, WT_CURSOR, reset) COPYDOC(__wt_cursor, WT_CURSOR, search) COPYDOC(__wt_cursor, WT_CURSOR, search_near) COPYDOC(__wt_cursor, WT_CURSOR, insert) +COPYDOC(__wt_cursor, WT_CURSOR, modify) COPYDOC(__wt_cursor, WT_CURSOR, update) COPYDOC(__wt_cursor, WT_CURSOR, remove) COPYDOC(__wt_cursor, WT_CURSOR, reserve) diff --git a/lang/java/wiredtiger.i b/lang/java/wiredtiger.i index d6fc5fc8b9a..4c22a0af43b 100644 --- a/lang/java/wiredtiger.i +++ b/lang/java/wiredtiger.i @@ -47,6 +47,7 @@ %} %{ +#include "wiredtiger.h" #include "src/include/wt_internal.h" /* @@ -108,6 +109,23 @@ static void throwWiredTigerException(JNIEnv *jenv, int err) { (*jenv)->ThrowNew(jenv, excep, wiredtiger_strerror(err)); } +struct __wt_java_modify_impl; +struct __wt_java_modify_list; +typedef struct __wt_java_modify_impl WT_MODIFY_IMPL; +typedef struct __wt_java_modify_list WT_MODIFY_LIST; +static void modify_impl_release(WT_MODIFY_IMPL *impl); +static void modify_list_release(WT_MODIFY_LIST *impl); + +/* + * An extension to the WT_MODIFY struct, so we can associate some Java-specific + * information with it. + */ +typedef struct __wt_java_modify_impl { + WT_MODIFY modify; + JNIEnv *jnienv; + jobject ref; +} WT_MODIFY_IMPL; + %} /* No finalizers */ @@ -159,6 +177,32 @@ static void throwWiredTigerException(JNIEnv *jenv, int err) { } %} +/* + * In some cases, for an internal interface, we need something like a WT_ITEM, + * but we need to hold onto the memory past the method call, and release it + * later. A WT_ITEM_HOLD serves the purpose, it retains the java object + * for the byte array that we make into a global reference. + */ +%typemap(jni) WT_ITEM_HOLD, WT_ITEM_HOLD * "jbyteArray" +%typemap(jtype) WT_ITEM_HOLD, WT_ITEM_HOLD * "byte[]" +%typemap(jstype) WT_ITEM_HOLD, WT_ITEM_HOLD * "byte[]" + +%typemap(javain) WT_ITEM_HOLD, WT_ITEM_HOLD * "$javainput" +%typemap(javaout) WT_ITEM_HOLD, WT_ITEM_HOLD * { + return ($jnicall); +} +%typemap(in) WT_ITEM_HOLD * (WT_ITEM_HOLD item) %{ + $1 = &item; + $1->data = (*jenv)->GetByteArrayElements(jenv, $input, 0); + $1->size = (size_t)(*jenv)->GetArrayLength(jenv, $input); + $1->jnienv = jenv; + $1->ref = (*jenv)->NewGlobalRef(jenv, $input); +%} + +%typemap(argout) WT_ITEM_HOLD * %{ + /* Explicitly don't release the byte array elements here. */ +%} + /* Don't require empty config strings. */ %typemap(default) const char *config %{ $1 = NULL; %} @@ -309,6 +353,10 @@ WT_CLASS(struct __wt_async_op, WT_ASYNC_OP, op) %rename (prev_wrap) __wt_cursor::prev; %javamethodmodifiers __wt_cursor::key_format "protected"; %javamethodmodifiers __wt_cursor::value_format "protected"; +%ignore __wt_modify::data; +%ignore __wt_modify::position; +%ignore __wt_modify::size; +%ignore __wt_cursor::modify; %ignore __wt_cursor::compare(WT_CURSOR *, WT_CURSOR *, int *); %rename (compare_wrap) __wt_cursor::compare; @@ -1224,6 +1272,47 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; JCALL1(DeleteLocalRef, jcb->jnienv, jcursor); return (0); } + + int modify_wrap(WT_MODIFY_LIST *list, WT_ITEM *k) { + int ret; + + $self->set_key($self, k); + ret = $self->modify(self, list->mod_array, list->count); + modify_list_release(list); + return (ret); + } + + /* + * Called internally after a new call. The artificial constructor for + * WT_MODIFY_LIST has no opportunity to throw an exception on a memory + * allocation failure, so the the null check must be made within a + * method on WT_CURSOR. + */ + bool _new_check_modify_list(WT_MODIFY_LIST *list) { + JAVA_CALLBACK *jcb; + if (list == NULL) { + jcb = (JAVA_CALLBACK *)$self->lang_private; + throwWiredTigerException(jcb->jnienv, ENOMEM); + return (false); + } + return (true); + } + + /* + * Called internally after a new call. The artificial constructor for + * WT_MODIFY has no opportunity to throw an exception on a memory + * allocation failure, so the the null check must be made within a + * method on WT_CURSOR. + */ + bool _new_check_modify(WT_MODIFY *mod) { + JAVA_CALLBACK *jcb; + if (mod == NULL) { + jcb = (JAVA_CALLBACK *)$self->lang_private; + throwWiredTigerException(jcb->jnienv, ENOMEM); + return (false); + } + return (true); + } } /* Cache key/value formats in Cursor */ @@ -1820,6 +1909,149 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler}; return new PackInputStream(valueFormat, get_value_wrap(), _java_raw()); } + + /** + * Modify an existing record. + * + * The cursor must already be positioned, and the key's value will be + * updated. + * + * \param mods an array of modifications. + * \return 0 on success, errno on error. + */ + public int modify(Modify mods[]) + throws WiredTigerException { + byte[] key = keyPacker.getValue(); + keyPacker.reset(); + + WT_MODIFY_LIST l = new WT_MODIFY_LIST(mods.length); + if (!_new_check_modify_list(l)) + return (0); // exception is already thrown + int pos = 0; + + for (Modify m : mods) { + if (!_new_check_modify(m)) + return (0); // exception is already thrown + l.set(pos, m); + pos++; + } + return modify_wrap(l, key); + } +%} + +/* + * Support for WT_CURSOR.modify. + */ + +%inline %{ +typedef struct __wt_java_item_hold { +#ifndef SWIG + void *data; + size_t size; + JNIEnv *jnienv; + jobject ref; +#endif +} WT_ITEM_HOLD; + +/* + * An internal Java class encapsulates a list of Modify objects (stored as a + * WT_MODIFY array in C). + */ +typedef struct __wt_java_modify_list { +#ifndef SWIG + WT_MODIFY *mod_array; + jobject *ref_array; + JNIEnv *jnienv; + int count; +#endif +} WT_MODIFY_LIST; +%} +%extend __wt_java_modify_list { + __wt_java_modify_list(int count) { + WT_MODIFY_LIST *self; + if (__wt_calloc_def(NULL, 1, &self) != 0) + return (NULL); + if (__wt_calloc_def(NULL, (size_t)count, + &self->mod_array) != 0) { + __wt_free(NULL, self); + return (NULL); + } + if (__wt_calloc_def(NULL, (size_t)count, + &self->ref_array) != 0) { + __wt_free(NULL, self->mod_array); + __wt_free(NULL, self); + return (NULL); + } + self->count = count; + return (self); + } + ~__wt_java_modify_list() { + modify_list_release(self); + __wt_free(NULL, self); + } + void set(int i, WT_MODIFY *m) { + WT_MODIFY_IMPL *impl = (WT_MODIFY_IMPL *)m; + self->mod_array[i] = *m; + self->ref_array[i] = impl->ref; + impl->ref = (jobject)0; + self->jnienv = impl->jnienv; + } +}; + +%extend __wt_modify { + __wt_modify() { + WT_MODIFY_IMPL *self; + if (__wt_calloc_def(NULL, 1, &self) != 0) + return (NULL); + self->modify.data.data = NULL; + self->modify.data.size = 0; + self->modify.offset = 0; + self->modify.size = 0; + return (&self->modify); + } + __wt_modify(WT_ITEM_HOLD *itemdata, + size_t offset, size_t size) { + WT_MODIFY_IMPL *self; + if (__wt_calloc_def(NULL, 1, &self) != 0) + return (NULL); + self->modify.data.data = itemdata->data; + self->modify.data.size = itemdata->size; + self->modify.offset = offset; + self->modify.size = size; + self->ref = itemdata->ref; + self->jnienv = itemdata->jnienv; + return (&self->modify); + } + ~__wt_modify() { + modify_impl_release((WT_MODIFY_IMPL *)self); + __wt_free(NULL, self); + } +}; + +%{ +static void modify_list_release(WT_MODIFY_LIST *list) { + for (int i = 0; i < list->count; i++) + if (list->ref_array[i] != (jobject)0) { + (*list->jnienv)->ReleaseByteArrayElements( + list->jnienv, list->ref_array[i], + (jbyte *)list->mod_array[i].data.data, 0); + (*list->jnienv)->DeleteGlobalRef( + list->jnienv, list->ref_array[i]); + } + __wt_free(NULL, list->ref_array); + __wt_free(NULL, list->mod_array); + list->count = 0; +} + +static void modify_impl_release(WT_MODIFY_IMPL *impl) { + if (impl->ref != (jobject)0) { + (*impl->jnienv)->ReleaseByteArrayElements( + impl->jnienv, impl->ref, + (jbyte *)impl->modify.data.data, 0); + (*impl->jnienv)->DeleteGlobalRef(impl->jnienv, impl->ref); + impl->ref = (jobject)0; + } +} %} /* Put a WiredTigerException on all wrapped methods. We'd like this @@ -1902,6 +2134,7 @@ REQUIRE_WRAP(WT_ASYNC_OP::get_id, __wt_async_op::get_id,getId) %rename(AsyncOp) __wt_async_op; %rename(Cursor) __wt_cursor; +%rename(Modify) __wt_modify; %rename(Session) __wt_session; %rename(Connection) __wt_connection; diff --git a/lang/python/wiredtiger.i b/lang/python/wiredtiger.i index 7a297312bb8..61c7fc62c43 100644 --- a/lang/python/wiredtiger.i +++ b/lang/python/wiredtiger.i @@ -151,6 +151,74 @@ from packing import pack, unpack } } +%typemap(in) WT_MODIFY * (int len, WT_MODIFY *modarray, int i) { + len = PyList_Size($input); + /* + * We allocate an extra cleared WT_MODIFY struct, the first + * entry will be used solely to transmit the array length to + * the call site. + */ + if (__wt_calloc_def(NULL, (size_t)len + 1, &modarray) != 0) + SWIG_exception_fail(SWIG_MemoryError, "WT calloc failed"); + modarray[0].size = (size_t)len; + for (i = 1; i <= len; i++) { + PyObject *dataobj, *modobj, *offsetobj, *sizeobj; + char *datadata; + long offset, size; + Py_ssize_t datasize; + + if ((modobj = PySequence_GetItem($input, i - 1)) == NULL) + SWIG_exception_fail(SWIG_IndexError, + "Modify sequence failed"); + + WT_GETATTR(dataobj, modobj, "data"); + if (PyString_AsStringAndSize(dataobj, &datadata, + &datasize) < 0) { + Py_DECREF(dataobj); + Py_DECREF(modobj); + SWIG_exception_fail(SWIG_AttributeError, + "Modify.data bad value"); + } + modarray[i].data.data = malloc(datasize); + memcpy(modarray[i].data.data, datadata, datasize); + modarray[i].data.size = datasize; + Py_DECREF(dataobj); + + WT_GETATTR(offsetobj, modobj, "offset"); + if ((offset = PyInt_AsLong(offsetobj)) < 0) { + Py_DECREF(offsetobj); + Py_DECREF(modobj); + SWIG_exception_fail(SWIG_RuntimeError, + "Modify.offset bad value"); + } + modarray[i].offset = offset; + Py_DECREF(offsetobj); + + WT_GETATTR(sizeobj, modobj, "size"); + if ((size = PyInt_AsLong(sizeobj)) < 0) { + Py_DECREF(sizeobj); + Py_DECREF(modobj); + SWIG_exception_fail(SWIG_RuntimeError, + "Modify.size bad value"); + } + modarray[i].size = size; + Py_DECREF(sizeobj); + Py_DECREF(modobj); + } + $1 = modarray; +} + +%typemap(freearg) WT_MODIFY * { + /* The WT_MODIFY arg is in position 2. Is there a better way? */ + WT_MODIFY *modarray = modarray2; + size_t i, len; + + len = modarray[0].size; + for (i = 1; i <= len; i++) + __wt_free(NULL, modarray[i].data.data); + __wt_free(NULL, modarray); +} + /* 64 bit typemaps. */ %typemap(in) uint64_t { $1 = PyLong_AsUnsignedLongLong($input); @@ -244,6 +312,13 @@ static PyObject *wtError; static int sessionFreeHandler(WT_SESSION *session_arg); static int cursorFreeHandler(WT_CURSOR *cursor_arg); + +#define WT_GETATTR(var, parent, name) \ + do if ((var = PyObject_GetAttrString(parent, name)) == NULL) { \ + Py_DECREF(parent); \ + SWIG_exception_fail(SWIG_AttributeError, \ + "Modify." #name " get failed"); \ + } while(0) %} %init %{ @@ -373,8 +448,8 @@ retry: } %enddef -/* Any API that returns an enum type uses this. */ -%define ENUM_OK(m) +/* An API that returns a value that shouldn't be checked uses this. */ +%define ANY_OK(m) %exception m { $action } @@ -408,12 +483,14 @@ retry: %enddef EBUSY_OK(__wt_connection::async_new_op) -ENUM_OK(__wt_async_op::get_type) +ANY_OK(__wt_async_op::get_type) NOTFOUND_OK(__wt_cursor::next) NOTFOUND_OK(__wt_cursor::prev) NOTFOUND_OK(__wt_cursor::remove) NOTFOUND_OK(__wt_cursor::search) NOTFOUND_OK(__wt_cursor::update) +ANY_OK(__wt_modify::__wt_modify) +ANY_OK(__wt_modify::~__wt_modify) COMPARE_OK(__wt_cursor::_compare) COMPARE_OK(__wt_cursor::_equals) @@ -448,6 +525,11 @@ COMPARE_NOTFOUND_OK(__wt_cursor::_search_near) %ignore __wt_cursor::get_value; %ignore __wt_cursor::set_key; %ignore __wt_cursor::set_value; +%ignore __wt_cursor::modify(WT_CURSOR *, WT_MODIFY *, int); +%rename (modify) __wt_cursor::_modify; +%ignore __wt_modify::data; +%ignore __wt_modify::offset; +%ignore __wt_modify::size; /* Next, override methods that return integers via arguments. */ %ignore __wt_cursor::compare(WT_CURSOR *, WT_CURSOR *, int *); @@ -772,6 +854,15 @@ typedef int int_void; return (cursorFreeHandler($self)); } + /* + * modify: the size of the array was put into the first element by the + * typemap. + */ + int _modify(WT_MODIFY *list) { + int count = (int)list[0].size; + return (self->modify(self, &list[1], count)); + } + %pythoncode %{ def get_key(self): '''get_key(self) -> object @@ -870,6 +961,21 @@ typedef int int_void; %} }; +/* + * Support for WT_CURSOR.modify. The WT_MODIFY object is known to + * SWIG, but its attributes are regular Python attributes. + * We extract the attributes at the call site to WT_CURSOR.modify + * so we don't have to deal with managing Python objects references. + */ +%extend __wt_modify { +%pythoncode %{ + def __init__(self, data = '', offset = 0, size = 0): + self.data = data + self.offset = offset + self.size = size +%} +}; + %extend __wt_session { int _log_printf(const char *msg) { return self->log_printf(self, "%s", msg); @@ -951,6 +1057,7 @@ OVERRIDE_METHOD(__wt_session, WT_SESSION, log_printf, (self, msg)) %rename(AsyncOp) __wt_async_op; %rename(Cursor) __wt_cursor; +%rename(Modify) __wt_modify; %rename(Session) __wt_session; %rename(Connection) __wt_connection; @@ -974,7 +1081,7 @@ writeToPythonStream(const char *streamname, const char *message) written = NULL; arglist = arglist2 = NULL; msglen = strlen(message); - msg = malloc(msglen + 2); + WT_RET(__wt_malloc(NULL, msglen + 2, &msg)); strcpy(msg, message); strcpy(&msg[msglen], "\n"); @@ -1010,8 +1117,7 @@ err: Py_XDECREF(arglist2); /* Release python Global Interpreter Lock */ SWIG_PYTHON_THREAD_END_BLOCK; - if (msg) - free(msg); + __wt_free(NULL, msg); return (ret); } @@ -1232,4 +1338,3 @@ _rename_with_prefix('WT_STAT_CONN_', stat.conn) _rename_with_prefix('WT_STAT_DSRC_', stat.dsrc) del _rename_with_prefix %} - diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c index 091b9345713..7b92a58991d 100644 --- a/src/btree/bt_curnext.c +++ b/src/btree/bt_curnext.c @@ -142,7 +142,7 @@ new_page: if (cbt->ins == NULL) __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL) continue; - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -205,7 +205,7 @@ new_page: /* Find the matching WT_COL slot. */ upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd); if (upd != NULL) { - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -325,7 +325,7 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage) new_insert: if ((ins = cbt->ins) != NULL) { if ((upd = __wt_txn_read(session, ins->upd)) == NULL) continue; - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -358,7 +358,7 @@ new_insert: if ((ins = cbt->ins) != NULL) { cbt->slot = cbt->row_iteration_slot / 2 - 1; rip = &page->pg_row[cbt->slot]; upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip)); - if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd)) { + if (upd != NULL && upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c index 4f0fa77d3e6..55b5095fe91 100644 --- a/src/btree/bt_curprev.c +++ b/src/btree/bt_curprev.c @@ -288,7 +288,7 @@ new_page: if (cbt->ins == NULL) __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL) continue; - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -352,7 +352,7 @@ new_page: if (cbt->recno < cbt->ref->ref_recno) upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd); if (upd != NULL) { - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -482,7 +482,7 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage) new_insert: if ((ins = cbt->ins) != NULL) { if ((upd = __wt_txn_read(session, ins->upd)) == NULL) continue; - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; @@ -517,7 +517,7 @@ new_insert: if ((ins = cbt->ins) != NULL) { cbt->slot = cbt->row_iteration_slot / 2 - 1; rip = &page->pg_row[cbt->slot]; upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip)); - if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd)) { + if (upd != NULL && upd->type == WT_UPDATE_DELETED) { if (__wt_txn_visible_all(session, upd->txnid)) ++cbt->page_deleted_count; continue; diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 664545ee3a0..7e415150cc5 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -224,7 +224,7 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) */ if (cbt->ins != NULL && (upd = __wt_txn_read(session, cbt->ins->upd)) != NULL) { - if (WT_UPDATE_DELETED_ISSET(upd)) + if (upd->type == WT_UPDATE_DELETED) return (false); if (updp != NULL) *updp = upd; @@ -297,7 +297,7 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) page->modify->mod_row_update != NULL && (upd = __wt_txn_read(session, page->modify->mod_row_update[cbt->slot])) != NULL) { - if (WT_UPDATE_DELETED_ISSET(upd)) + if (upd->type == WT_UPDATE_DELETED) return (false); if (updp != NULL) *updp = upd; @@ -342,11 +342,11 @@ __cursor_row_search( * Column-store delete, insert, and update from an application cursor. */ static inline int -__cursor_col_modify(WT_SESSION_IMPL *session, - WT_CURSOR_BTREE *cbt, bool is_remove, bool is_reserve) +__cursor_col_modify( + WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type) { return (__wt_col_modify(session, cbt, - cbt->iface.recno, &cbt->iface.value, NULL, is_remove, is_reserve)); + cbt->iface.recno, &cbt->iface.value, NULL, modify_type)); } /* @@ -354,11 +354,11 @@ __cursor_col_modify(WT_SESSION_IMPL *session, * Row-store insert, update and delete from an application cursor. */ static inline int -__cursor_row_modify(WT_SESSION_IMPL *session, - WT_CURSOR_BTREE *cbt, bool is_remove, bool is_reserve) +__cursor_row_modify( + WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type) { return (__wt_row_modify(session, cbt, - &cbt->iface.key, &cbt->iface.value, NULL, is_remove, is_reserve)); + &cbt->iface.key, &cbt->iface.value, NULL, modify_type)); } /* @@ -662,8 +662,8 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) */ cbt->compare = 0; ret = btree->type == BTREE_ROW ? - __cursor_row_modify(session, cbt, false, false) : - __cursor_col_modify(session, cbt, false, false); + __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD) : + __cursor_col_modify(session, cbt, WT_UPDATE_STANDARD); if (ret == 0) goto done; @@ -700,7 +700,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) WT_ERR(WT_DUPLICATE_KEY); - ret = __cursor_row_modify(session, cbt, false, false); + ret = __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD); } else { /* * Optionally insert a new record (ignoring the application's @@ -723,7 +723,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); (cbt->compare != 0 && __cursor_fix_implicit(btree, cbt)))) WT_ERR(WT_DUPLICATE_KEY); - WT_ERR(__cursor_col_modify(session, cbt, false, false)); + WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD)); if (append_key) cbt->iface.recno = cbt->recno; @@ -881,8 +881,8 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) */ cbt->compare = 0; ret = btree->type == BTREE_ROW ? - __cursor_row_modify(session, cbt, true, false) : - __cursor_col_modify(session, cbt, true, false); + __cursor_row_modify(session, cbt, WT_UPDATE_DELETED) : + __cursor_col_modify(session, cbt, WT_UPDATE_DELETED); if (ret == 0) goto done; @@ -921,7 +921,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) WT_ERR(WT_NOTFOUND); - ret = __cursor_row_modify(session, cbt, true, false); + ret = __cursor_row_modify(session, cbt, WT_UPDATE_DELETED); } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); @@ -948,7 +948,8 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); */ cbt->recno = cursor->recno; } else - ret = __cursor_col_modify(session, cbt, true, false); + ret = __cursor_col_modify( + session, cbt, WT_UPDATE_DELETED); } err: if (ret == WT_RESTART) { @@ -986,7 +987,7 @@ done: /* * Update a record in the tree. */ static int -__btcur_update(WT_CURSOR_BTREE *cbt, bool is_reserve) +__btcur_update(WT_CURSOR_BTREE *cbt, u_int modify_type) { WT_BTREE *btree; WT_CURFILE_STATE state; @@ -998,15 +999,6 @@ __btcur_update(WT_CURSOR_BTREE *cbt, bool is_reserve) cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; - WT_STAT_CONN_INCR(session, cursor_update); - WT_STAT_DATA_INCR(session, cursor_update); - WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size); - - if (btree->type == BTREE_ROW) - WT_RET(__cursor_size_chk(session, &cursor->key)); - if (!is_reserve) - WT_RET(__cursor_size_chk(session, &cursor->value)); - /* It's no longer possible to bulk-load into the tree. */ __cursor_disable_bulk(session, btree); @@ -1030,8 +1022,8 @@ __btcur_update(WT_CURSOR_BTREE *cbt, bool is_reserve) */ cbt->compare = 0; ret = btree->type == BTREE_ROW ? - __cursor_row_modify(session, cbt, false, is_reserve) : - __cursor_col_modify(session, cbt, false, is_reserve); + __cursor_row_modify(session, cbt, modify_type) : + __cursor_col_modify(session, cbt, modify_type); if (ret == 0) goto done; @@ -1069,7 +1061,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) WT_ERR(WT_NOTFOUND); } - ret = __cursor_row_modify(session, cbt, false, is_reserve); + ret = __cursor_row_modify(session, cbt, modify_type); } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); @@ -1088,7 +1080,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); !__cursor_fix_implicit(btree, cbt)) WT_ERR(WT_NOTFOUND); } - ret = __cursor_col_modify(session, cbt, false, is_reserve); + ret = __cursor_col_modify(session, cbt, modify_type); } err: if (ret == WT_RESTART) { @@ -1106,7 +1098,7 @@ err: if (ret == WT_RESTART) { * pointer to the modify function's allocated update structure. */ done: if (ret == 0) { - if (is_reserve) { + if (modify_type == WT_UPDATE_RESERVED) { F_CLR(cursor, WT_CURSTD_VALUE_SET); WT_TRET(__wt_key_return(session, cbt)); } else @@ -1131,14 +1123,19 @@ __wt_btcur_reserve(WT_CURSOR_BTREE *cbt) { WT_CURSOR *cursor; WT_DECL_RET; + WT_SESSION_IMPL *session; bool overwrite; cursor = &cbt->iface; + session = (WT_SESSION_IMPL *)cursor->session; + + WT_STAT_CONN_INCR(session, cursor_reserve); + WT_STAT_DATA_INCR(session, cursor_reserve); /* WT_CURSOR.reserve is update-without-overwrite and a special value. */ overwrite = F_ISSET(cursor, WT_CURSTD_OVERWRITE); F_CLR(cursor, WT_CURSTD_OVERWRITE); - ret = __btcur_update(cbt, true); + ret = __btcur_update(cbt, WT_UPDATE_RESERVED); if (overwrite) F_SET(cursor, WT_CURSTD_OVERWRITE); return (ret); @@ -1151,7 +1148,23 @@ __wt_btcur_reserve(WT_CURSOR_BTREE *cbt) int __wt_btcur_update(WT_CURSOR_BTREE *cbt) { - return (__btcur_update(cbt, false)); + WT_BTREE *btree; + WT_CURSOR *cursor; + WT_SESSION_IMPL *session; + + btree = cbt->btree; + cursor = &cbt->iface; + session = (WT_SESSION_IMPL *)cursor->session; + + WT_STAT_CONN_INCR(session, cursor_update); + WT_STAT_DATA_INCR(session, cursor_update); + WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size); + + if (btree->type == BTREE_ROW) + WT_RET(__cursor_size_chk(session, &cursor->key)); + WT_RET(__cursor_size_chk(session, &cursor->value)); + + return (__btcur_update(cbt, WT_UPDATE_STANDARD)); } /* @@ -1274,7 +1287,7 @@ __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) static int __cursor_truncate(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, - int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, bool, bool)) + int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int)) { WT_DECL_RET; @@ -1302,7 +1315,7 @@ retry: WT_RET(__wt_btcur_search(start)); F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); for (;;) { - if ((ret = rmfunc(session, start, true, false)) != 0) + if ((ret = rmfunc(session, start, WT_UPDATE_DELETED)) != 0) break; if (stop != NULL && __cursor_equals(start, stop)) @@ -1329,7 +1342,7 @@ retry: WT_RET(__wt_btcur_search(start)); static int __cursor_truncate_fix(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, - int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, bool, bool)) + int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int)) { WT_DECL_RET; const uint8_t *value; @@ -1360,7 +1373,7 @@ retry: WT_RET(__wt_btcur_search(start)); for (;;) { value = (const uint8_t *)start->iface.value.data; if (*value != 0 && - (ret = rmfunc(session, start, true, false)) != 0) + (ret = rmfunc(session, start, WT_UPDATE_DELETED)) != 0) break; if (stop != NULL && __cursor_equals(start, stop)) diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index 538c363a864..c3f98a98ec5 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -985,9 +985,9 @@ static int __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte) { for (; upd != NULL; upd = upd->next) - if (WT_UPDATE_DELETED_ISSET(upd)) + if (upd->type == WT_UPDATE_DELETED) WT_RET(ds->f(ds, "\tvalue {deleted}\n")); - else if (WT_UPDATE_RESERVED_ISSET(upd)) + else if (upd->type == WT_UPDATE_RESERVED) WT_RET(ds->f(ds, "\tvalue {reserved}\n")); else if (hexbyte) { WT_RET(ds->f(ds, "\t{")); diff --git a/src/btree/bt_delete.c b/src/btree/bt_delete.c index 12c3b044fda..4a88b672d47 100644 --- a/src/btree/bt_delete.c +++ b/src/btree/bt_delete.c @@ -333,7 +333,7 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) */ for (i = 0, size = 0; i < page->entries; ++i) { WT_ERR(__wt_calloc_one(session, &upd)); - WT_UPDATE_DELETED_SET(upd); + upd->type = WT_UPDATE_DELETED; if (page_del == NULL) upd->txnid = WT_TXN_NONE; /* Globally visible */ diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index 72a69e8591c..e6a0f53ab40 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -90,7 +90,8 @@ __col_instantiate(WT_SESSION_IMPL *session, { /* Search the page and add updates. */ WT_RET(__wt_col_search(session, recno, ref, cbt)); - WT_RET(__wt_col_modify(session, cbt, recno, NULL, upd, false, false)); + WT_RET(__wt_col_modify( + session, cbt, recno, NULL, upd, WT_UPDATE_STANDARD)); return (0); } @@ -104,7 +105,8 @@ __row_instantiate(WT_SESSION_IMPL *session, { /* Search the page and add updates. */ WT_RET(__wt_row_search(session, key, ref, cbt, true)); - WT_RET(__wt_row_modify(session, cbt, key, NULL, upd, false, false)); + WT_RET(__wt_row_modify( + session, cbt, key, NULL, upd, WT_UPDATE_STANDARD)); return (0); } @@ -127,7 +129,8 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_UPDATE *first_upd, *last_upd, *upd; size_t incr, total_incr; uint64_t current_recno, las_counter, las_txnid, recno, upd_txnid; - uint32_t las_id, upd_size, session_flags; + uint32_t las_id, session_flags; + uint8_t upd_type; int exact; const uint8_t *p; @@ -188,9 +191,10 @@ __las_page_instantiate(WT_SESSION_IMPL *session, /* Allocate the WT_UPDATE structure. */ WT_ERR(cursor->get_value( - cursor, &upd_txnid, &upd_size, las_value)); - WT_ERR(__wt_update_alloc(session, las_value, - &upd, &incr, upd_size == WT_UPDATE_DELETED_VALUE, false)); + cursor, &upd_txnid, &upd_type, las_value)); + WT_ERR(__wt_update_alloc(session, las_value, &upd, &incr, + upd_type == WT_UPDATE_DELETED ? + WT_UPDATE_DELETED : WT_UPDATE_STANDARD)); total_incr += incr; upd->txnid = upd_txnid; diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 23210a556da..c2c56a18131 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -1424,8 +1424,8 @@ __split_multi_inmem( WT_ERR(__wt_col_search(session, recno, ref, &cbt)); /* Apply the modification. */ - WT_ERR(__wt_col_modify( - session, &cbt, recno, NULL, upd, false, false)); + WT_ERR(__wt_col_modify(session, + &cbt, recno, NULL, upd, WT_UPDATE_STANDARD)); break; case WT_PAGE_ROW_LEAF: /* Build a key. */ @@ -1447,7 +1447,7 @@ __split_multi_inmem( /* Apply the modification. */ WT_ERR(__wt_row_modify( - session, &cbt, key, NULL, upd, false, false)); + session, &cbt, key, NULL, upd, WT_UPDATE_STANDARD)); break; WT_ILLEGAL_VALUE_ERR(session); } diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c index 2b9c9bef8a2..e3b9bbced48 100644 --- a/src/btree/bt_stat.c +++ b/src/btree/bt_stat.c @@ -178,9 +178,9 @@ __stat_page_col_var( */ WT_SKIP_FOREACH(ins, WT_COL_UPDATE(page, cip)) { upd = ins->upd; - if (WT_UPDATE_RESERVED_ISSET(upd)) + if (upd->type == WT_UPDATE_RESERVED) continue; - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { if (!orig_deleted) { ++deleted_cnt; --entry_cnt; @@ -195,9 +195,9 @@ __stat_page_col_var( /* Walk any append list. */ WT_SKIP_FOREACH(ins, WT_COL_APPEND(page)) { - if (WT_UPDATE_RESERVED_ISSET(ins->upd)) + if (ins->upd->type == WT_UPDATE_RESERVED) continue; - if (WT_UPDATE_DELETED_ISSET(ins->upd)) + if (ins->upd->type == WT_UPDATE_DELETED) ++deleted_cnt; else ++entry_cnt; @@ -268,8 +268,8 @@ __stat_page_row_leaf( * key on the page. */ WT_SKIP_FOREACH(ins, WT_ROW_INSERT_SMALLEST(page)) - if (!WT_UPDATE_DELETED_ISSET(ins->upd) && - !WT_UPDATE_RESERVED_ISSET(ins->upd)) + if (ins->upd->type != WT_UPDATE_DELETED && + ins->upd->type != WT_UPDATE_RESERVED) ++entry_cnt; /* @@ -279,8 +279,8 @@ __stat_page_row_leaf( WT_ROW_FOREACH(page, rip, i) { upd = WT_ROW_UPDATE(page, rip); if (upd == NULL || - (!WT_UPDATE_DELETED_ISSET(upd) && - !WT_UPDATE_RESERVED_ISSET(upd))) + (upd->type != WT_UPDATE_DELETED && + upd->type != WT_UPDATE_RESERVED)) ++entry_cnt; if (upd == NULL && (cell = __wt_row_leaf_value_cell(page, rip, NULL)) != NULL && @@ -289,8 +289,8 @@ __stat_page_row_leaf( /* Walk K/V pairs inserted after the on-page K/V pair. */ WT_SKIP_FOREACH(ins, WT_ROW_INSERT(page, rip)) - if (!WT_UPDATE_DELETED_ISSET(ins->upd) && - !WT_UPDATE_RESERVED_ISSET(ins->upd)) + if (ins->upd->type != WT_UPDATE_DELETED && + ins->upd->type != WT_UPDATE_RESERVED) ++entry_cnt; } diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c index b45f369f1c2..c256f03a612 100644 --- a/src/btree/col_modify.c +++ b/src/btree/col_modify.c @@ -17,8 +17,7 @@ static int __col_insert_alloc( */ int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, - uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, - bool is_remove, bool is_reserve) + uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type) { static const WT_ITEM col_fix_remove = { "", 1, NULL, 0, 0 }; WT_BTREE *btree; @@ -38,13 +37,15 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, upd = upd_arg; append = logged = false; - if (is_remove || is_reserve) { + if (modify_type == WT_UPDATE_DELETED || + modify_type == WT_UPDATE_RESERVED) { /* * Fixed-size column-store doesn't have on-page deleted values, * it's a nul byte. */ - if (is_remove && btree->type == BTREE_COL_FIX) { - is_remove = false; + if (modify_type == WT_UPDATE_DELETED && + btree->type == BTREE_COL_FIX) { + modify_type = WT_UPDATE_STANDARD; value = &col_fix_remove; } } else { @@ -89,7 +90,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, /* Allocate a WT_UPDATE structure and transaction ID. */ WT_ERR(__wt_update_alloc(session, - value, &upd, &upd_size, is_remove, is_reserve)); + value, &upd, &upd_size, modify_type)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -150,7 +151,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, if (upd_arg == NULL) { WT_ERR(__wt_update_alloc(session, - value, &upd, &upd_size, is_remove, is_reserve)); + value, &upd, &upd_size, modify_type)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -195,7 +196,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, } /* If the update was successful, add it to the in-memory log. */ - if (logged && !is_reserve) + if (logged && modify_type != WT_UPDATE_RESERVED) WT_ERR(__wt_txn_log_op(session, cbt)); if (0) { diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c index d3b087f92c6..2bf3c2f29bc 100644 --- a/src/btree/row_modify.c +++ b/src/btree/row_modify.c @@ -48,7 +48,7 @@ __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, - WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) + WT_UPDATE *upd_arg, u_int modify_type) { WT_DECL_RET; WT_INSERT *ins; @@ -97,7 +97,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, /* Allocate a WT_UPDATE structure and transaction ID. */ WT_ERR(__wt_update_alloc(session, - value, &upd, &upd_size, is_remove, is_reserve)); + value, &upd, &upd_size, modify_type)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -168,7 +168,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, if (upd_arg == NULL) { WT_ERR(__wt_update_alloc(session, - value, &upd, &upd_size, is_remove, is_reserve)); + value, &upd, &upd_size, modify_type)); WT_ERR(__wt_txn_modify(session, upd)); logged = true; @@ -207,7 +207,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, &ins, ins_size, skipdepth)); } - if (logged && !is_reserve) + if (logged && modify_type != WT_UPDATE_RESERVED) WT_ERR(__wt_txn_log_op(session, cbt)); if (0) { @@ -261,7 +261,7 @@ __wt_row_insert_alloc(WT_SESSION_IMPL *session, */ int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, - WT_UPDATE **updp, size_t *sizep, bool is_remove, bool is_reserve) + WT_UPDATE **updp, size_t *sizep, u_int modify_type) { WT_UPDATE *upd; @@ -271,13 +271,10 @@ __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, * Allocate the WT_UPDATE structure and room for the value, then copy * the value into place. */ - if (is_remove || is_reserve) { + if (modify_type == WT_UPDATE_DELETED || + modify_type == WT_UPDATE_RESERVED) WT_RET(__wt_calloc(session, 1, sizeof(WT_UPDATE), &upd)); - if (is_remove) - WT_UPDATE_DELETED_SET(upd); - if (is_reserve) - WT_UPDATE_RESERVED_SET(upd); - } else { + else { WT_RET(__wt_calloc( session, 1, sizeof(WT_UPDATE) + value->size, &upd)); if (value->size != 0) { @@ -285,6 +282,7 @@ __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, memcpy(WT_UPDATE_DATA(upd), value->data, value->size); } } + upd->type = (uint8_t)modify_type; *updp = upd; *sizep = WT_UPDATE_MEMSIZE(upd); diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c index a30cb6f0e17..60750b88900 100644 --- a/src/cursor/cur_backup.c +++ b/src/cursor/cur_backup.c @@ -119,6 +119,7 @@ __wt_curbackup_open(WT_SESSION_IMPL *session, __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_config.c b/src/cursor/cur_config.c index a0b87b2b3c6..6c198315e33 100644 --- a/src/cursor/cur_config.c +++ b/src/cursor/cur_config.c @@ -39,6 +39,7 @@ __wt_curconfig_open(WT_SESSION_IMPL *session, __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_ds.c b/src/cursor/cur_ds.c index bb7a7a9994a..10de133be75 100644 --- a/src/cursor/cur_ds.c +++ b/src/cursor/cur_ds.c @@ -458,6 +458,7 @@ __wt_curds_open( __curds_search, /* search */ __curds_search_near, /* search-near */ __curds_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __curds_update, /* update */ __curds_remove, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_dump.c b/src/cursor/cur_dump.c index 73328da6246..3e90d321db6 100644 --- a/src/cursor/cur_dump.c +++ b/src/cursor/cur_dump.c @@ -369,6 +369,7 @@ __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) __curdump_search, /* search */ __curdump_search_near, /* search-near */ __curdump_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __curdump_update, /* update */ __curdump_remove, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index f4d42802032..4469cac685d 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -352,6 +352,11 @@ __curfile_reserve(WT_CURSOR *cursor) WT_ERR(__wt_btcur_reserve(cbt)); + /* + * Reserve maintains a position and key, which doesn't match the library + * API, where reserve maintains a value. Fix the API by searching after + * each successful reserve operation. + */ WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); WT_ASSERT(session, F_MASK(cursor, WT_CURSTD_VALUE_SET) == 0); @@ -430,6 +435,7 @@ __curfile_create(WT_SESSION_IMPL *session, __curfile_search, /* search */ __curfile_search_near, /* search-near */ __curfile_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __curfile_update, /* update */ __curfile_remove, /* remove */ __curfile_reserve, /* reserve */ diff --git a/src/cursor/cur_index.c b/src/cursor/cur_index.c index fcf00e4fa03..e8fcb1b2702 100644 --- a/src/cursor/cur_index.c +++ b/src/cursor/cur_index.c @@ -449,6 +449,7 @@ __wt_curindex_open(WT_SESSION_IMPL *session, __curindex_search, /* search */ __curindex_search_near, /* search-near */ __wt_cursor_notsup, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c index cebf8a7fd6e..e4ccb90139e 100644 --- a/src/cursor/cur_join.c +++ b/src/cursor/cur_join.c @@ -591,6 +591,7 @@ __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry, __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __curjoin_extract_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ @@ -1293,6 +1294,7 @@ __wt_curjoin_open(WT_SESSION_IMPL *session, __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_log.c b/src/cursor/cur_log.c index c8dc44bb392..38e9d4a1784 100644 --- a/src/cursor/cur_log.c +++ b/src/cursor/cur_log.c @@ -342,6 +342,7 @@ __wt_curlog_open(WT_SESSION_IMPL *session, __curlog_search, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_metadata.c b/src/cursor/cur_metadata.c index 9a38996d4ce..d9aeed1fccd 100644 --- a/src/cursor/cur_metadata.c +++ b/src/cursor/cur_metadata.c @@ -550,6 +550,7 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session, __curmetadata_search, /* search */ __curmetadata_search_near, /* search-near */ __curmetadata_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __curmetadata_update, /* update */ __curmetadata_remove, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c index 0bfe5679677..a1ec1d75918 100644 --- a/src/cursor/cur_stat.c +++ b/src/cursor/cur_stat.c @@ -576,6 +576,7 @@ __wt_curstat_open(WT_SESSION_IMPL *session, __curstat_search, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c index e42c5c7766e..91995ab0e0a 100644 --- a/src/cursor/cur_std.c +++ b/src/cursor/cur_std.c @@ -89,6 +89,19 @@ __wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) return (__wt_cursor_notsup(cursor)); } +/* + * __wt_cursor_modify_notsup -- + * Unsupported cursor modify. + */ +int +__wt_cursor_modify_notsup(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) +{ + WT_UNUSED(entries); + WT_UNUSED(nentries); + + return (__wt_cursor_notsup(cursor)); +} + /* * __wt_cursor_search_near_notsup -- * Unsupported cursor search-near. @@ -581,6 +594,100 @@ err: API_END(session, ret); return (ret); } +/* + * __cursor_modify -- + * WT_CURSOR->modify default implementation. + */ +static int +__cursor_modify(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + WT_DECL_ITEM(ta); + WT_DECL_ITEM(tb); + WT_DECL_ITEM(tmp); + size_t len, size; + int i; + + CURSOR_UPDATE_API_CALL(cursor, session, modify); + WT_ERR(__cursor_checkkey(cursor)); + + /* Check for a rational modify vector count. */ + if (nentries <= 0) + WT_ERR_MSG( + session, EINVAL, "Illegal modify vector of %d", nentries); + + WT_STAT_CONN_INCR(session, cursor_modify); + WT_STAT_DATA_INCR(session, cursor_modify); + + /* Acquire position and value. */ + WT_ERR(cursor->search(cursor)); + + /* + * Process the entries to figure out how large a buffer we need. This is + * a bit pessimistic because we're ignoring replacement bytes, but it's + * a simpler calculation. + */ + for (size = cursor->value.size, i = 0; i < nentries; ++i) { + if (entries[i].offset >= size) + size = entries[i].offset; + size += entries[i].data.size; + } + + /* Allocate a pair of buffers. */ + WT_ERR(__wt_scr_alloc(session, size, &ta)); + WT_ERR(__wt_scr_alloc(session, size, &tb)); + + /* Apply the change vector to the value. */ + WT_ERR(__wt_buf_set( + session, ta, cursor->value.data, cursor->value.size)); + for (i = 0; i < nentries; ++i) { + /* Take leading bytes from the original, plus any gap bytes. */ + if (entries[i].offset >= ta->size) { + memcpy(tb->mem, ta->mem, ta->size); + if (entries[i].offset > ta->size) + memset((uint8_t *)tb->mem + ta->size, + '\0', entries[i].offset - ta->size); + } else + if (entries[i].offset > 0) + memcpy(tb->mem, ta->mem, entries[i].offset); + tb->size = entries[i].offset; + + /* Take replacement bytes. */ + if (entries[i].data.size > 0) { + memcpy((uint8_t *)tb->mem + tb->size, + entries[i].data.data, entries[i].data.size); + tb->size += entries[i].data.size; + } + + /* Take trailing bytes from the original. */ + len = entries[i].offset + entries[i].size; + if (ta->size > len) { + memcpy((uint8_t *)tb->mem + tb->size, + (uint8_t *)ta->mem + len, ta->size - len); + tb->size += ta->size - len; + } + WT_ASSERT(session, tb->size <= size); + + tmp = ta; + ta = tb; + tb = tmp; + } + + /* Set the cursor's value. */ + ta->data = ta->mem; + cursor->set_value(cursor, ta); + + /* We know both key and value are set, "overwrite" doesn't matter. */ + ret = cursor->update(cursor); + +err: __wt_scr_free(session, &ta); + __wt_scr_free(session, &tb); + + CURSOR_UPDATE_API_END(session, ret); + return (ret); +} + /* * __wt_cursor_reconfigure -- * Set runtime-configurable settings. @@ -756,6 +863,14 @@ __wt_cursor_init(WT_CURSOR *cursor, if (cval.val != 0) F_SET(cursor, WT_CURSTD_RAW); + /* + * WT_CURSOR.modify supported on 'u' value formats, but may have been + * already initialized. + */ + if (WT_STREQ(cursor->value_format, "u") && + cursor->modify == __wt_cursor_modify_notsup) + cursor->modify = __cursor_modify; + /* * Cursors that are internal to some other cursor (such as file cursors * inside a table cursor) should be closed after the containing cursor. diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c index 89c98986c0f..3959d58476b 100644 --- a/src/cursor/cur_table.c +++ b/src/cursor/cur_table.c @@ -91,6 +91,7 @@ __wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx, __wt_cursor_notsup, /* search */ __wt_cursor_search_near_notsup, /* search-near */ __curextract_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __wt_cursor_notsup, /* update */ __wt_cursor_notsup, /* remove */ __wt_cursor_notsup, /* reserve */ @@ -949,6 +950,7 @@ __wt_curtable_open(WT_SESSION_IMPL *session, __curtable_search, /* search */ __curtable_search_near, /* search-near */ __curtable_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __curtable_update, /* update */ __curtable_remove, /* remove */ __curtable_reserve, /* reserve */ diff --git a/src/docs/Doxyfile b/src/docs/Doxyfile index 3d8c46962f1..e7382e2bc5e 100644 --- a/src/docs/Doxyfile +++ b/src/docs/Doxyfile @@ -1582,6 +1582,7 @@ PREDEFINED = DOXYGEN \ __wt_file_system:=WT_FILE_SYSTEM \ __wt_item:=WT_ITEM \ __wt_lsn:=WT_LSN \ + __wt_modify:=WT_MODIFY \ __wt_session:=WT_SESSION \ __wt_txn_notify:=WT_TXN_NOTIFY \ WT_HANDLE_CLOSED(x):=x \ diff --git a/src/include/btmem.h b/src/include/btmem.h index 6755db81007..4e8d3c05d7d 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -208,7 +208,7 @@ struct __wt_ovfl_txnc { */ #define WT_LAS_FORMAT \ "key_format=" WT_UNCHECKED_STRING(IuQQu) \ - ",value_format=" WT_UNCHECKED_STRING(QIu) + ",value_format=" WT_UNCHECKED_STRING(QBu) /* * WT_PAGE_MODIFY -- @@ -809,11 +809,11 @@ struct __wt_row { /* On-page key, on-page cell, or off-page WT_IKEY */ * Walk the entries of an in-memory row-store leaf page. */ #define WT_ROW_FOREACH(page, rip, i) \ - for ((i) = (page)->entries, \ + for ((i) = (page)->entries, \ (rip) = (page)->pg_row; (i) > 0; ++(rip), --(i)) #define WT_ROW_FOREACH_REVERSE(page, rip, i) \ - for ((i) = (page)->entries, \ - (rip) = (page)->pg_row + ((page)->entries - 1); \ + for ((i) = (page)->entries, \ + (rip) = (page)->pg_row + ((page)->entries - 1); \ (i) > 0; --(rip), --(i)) /* @@ -861,7 +861,7 @@ struct __wt_col { * Walk the entries of variable-length column-store leaf page. */ #define WT_COL_FOREACH(page, cip, i) \ - for ((i) = (page)->entries, \ + for ((i) = (page)->entries, \ (cip) = (page)->pg_var; (i) > 0; ++(cip), --(i)) /* @@ -908,23 +908,16 @@ struct __wt_ikey { * list. */ WT_PACKED_STRUCT_BEGIN(__wt_update) - uint64_t txnid; /* update transaction */ + uint64_t txnid; /* transaction */ WT_UPDATE *next; /* forward-linked list */ - /* - * Use the maximum size and maximum size-1 as is-deleted and is-reserved - * flags (which means we can't store 4GB objects), instead of increasing - * the size of this structure for a flag bit. - */ -#define WT_UPDATE_DELETED_VALUE UINT32_MAX -#define WT_UPDATE_DELETED_SET(u) ((u)->size = WT_UPDATE_DELETED_VALUE) -#define WT_UPDATE_DELETED_ISSET(u) ((u)->size == WT_UPDATE_DELETED_VALUE) + uint32_t size; /* data length */ -#define WT_UPDATE_RESERVED_VALUE (UINT32_MAX - 1) -#define WT_UPDATE_RESERVED_SET(u) ((u)->size = WT_UPDATE_RESERVED_VALUE) -#define WT_UPDATE_RESERVED_ISSET(u) ((u)->size == WT_UPDATE_RESERVED_VALUE) - uint32_t size; /* update length */ +#define WT_UPDATE_STANDARD 0 +#define WT_UPDATE_DELETED 1 +#define WT_UPDATE_RESERVED 2 + uint8_t type; /* type (one byte to conserve memory) */ /* The untyped value immediately follows the WT_UPDATE structure. */ #define WT_UPDATE_DATA(upd) \ @@ -936,9 +929,13 @@ WT_PACKED_STRUCT_BEGIN(__wt_update) * cache overhead calculation. */ #define WT_UPDATE_MEMSIZE(upd) \ - WT_ALIGN(sizeof(WT_UPDATE) + (WT_UPDATE_DELETED_ISSET(upd) || \ - WT_UPDATE_RESERVED_ISSET(upd) ? 0 : (upd)->size), 32) + WT_ALIGN(sizeof(WT_UPDATE) + (upd)->size, 32) WT_PACKED_STRUCT_END +/* + * WT_UPDATE_SIZE is the expected structure size -- we verify the build to + * ensure the compiler hasn't inserted padding. + */ +#define WT_UPDATE_SIZE 21 /* * WT_INSERT -- diff --git a/src/include/cursor.h b/src/include/cursor.h index b044329fbfe..8d2f2c80c2a 100644 --- a/src/include/cursor.h +++ b/src/include/cursor.h @@ -22,6 +22,7 @@ search, \ search_near, \ insert, \ + modify, \ update, \ remove, \ reserve, \ @@ -44,6 +45,7 @@ search, \ search_near, \ insert, \ + modify, \ update, \ remove, \ reserve, \ diff --git a/src/include/extern.h b/src/include/extern.h index a3ce0f3746f..01c21b188c0 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -180,7 +180,7 @@ extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *b extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tree_walk_skip( WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -189,9 +189,9 @@ extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, c extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, const WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, size_t *sizep, bool is_remove, bool is_reserve) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, size_t *sizep, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd); extern void __wt_update_obsolete_free( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd); extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -314,6 +314,7 @@ extern void __wt_cursor_set_key_notsup(WT_CURSOR *cursor, ...); extern void __wt_cursor_set_value_notsup(WT_CURSOR *cursor, ...); extern int __wt_cursor_compare_notsup(WT_CURSOR *a, WT_CURSOR *b, int *cmpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_cursor_equals_notsup(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_cursor_modify_notsup(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_cursor_search_near_notsup(WT_CURSOR *cursor, int *exact) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_cursor_reconfigure_notsup(WT_CURSOR *cursor, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_cursor_set_notsup(WT_CURSOR *cursor); diff --git a/src/include/log.h b/src/include/log.h index f80514a3546..e7bc28cd220 100644 --- a/src/include/log.h +++ b/src/include/log.h @@ -130,7 +130,7 @@ union __wt_lsn { #define WT_LOG_SLOT_FLAGS(state) ((state) & WT_LOG_SLOT_MASK_ON) #define WT_LOG_SLOT_JOINED(state) (((state) & WT_LOG_SLOT_MASK_OFF) >> 32) #define WT_LOG_SLOT_JOINED_BUFFERED(state) \ - (WT_LOG_SLOT_JOINED(state) & \ + (WT_LOG_SLOT_JOINED(state) & \ (WT_LOG_SLOT_UNBUFFERED - 1)) #define WT_LOG_SLOT_JOIN_REL(j, r, s) (((j) << 32) + (r) + (s)) #define WT_LOG_SLOT_RELEASED(state) ((int64_t)(int32_t)(state)) diff --git a/src/include/lsm.h b/src/include/lsm.h index 08313438eb8..f8d0f480cbb 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -240,11 +240,11 @@ struct __wt_lsm_tree { * area, copying them into place when a statistics cursor is created. */ #define WT_LSM_TREE_STAT_INCR(session, fld) do { \ - if (WT_STAT_ENABLED(session)) \ + if (WT_STAT_ENABLED(session)) \ ++(fld); \ } while (0) #define WT_LSM_TREE_STAT_INCRV(session, fld, v) do { \ - if (WT_STAT_ENABLED(session)) \ + if (WT_STAT_ENABLED(session)) \ (fld) += (int64_t)(v); \ } while (0) int64_t bloom_false_positive; diff --git a/src/include/mutex.h b/src/include/mutex.h index c0e25ebb295..00babd47fbf 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -66,8 +66,8 @@ struct __wt_spinlock { WT_CACHE_LINE_PAD_BEGIN #if SPINLOCK_TYPE == SPINLOCK_GCC volatile int lock; -#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ - SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE ||\ +#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \ + SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE || \ SPINLOCK_TYPE == SPINLOCK_MSVC wt_mutex_t lock; #else diff --git a/src/include/mutex.i b/src/include/mutex.i index 44b8494cdbf..5b14bb24730 100644 --- a/src/include/mutex.i +++ b/src/include/mutex.i @@ -102,8 +102,8 @@ __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t) __sync_lock_release(&t->lock); } -#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ - SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE +#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \ + SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE /* * __wt_spin_init -- @@ -142,8 +142,8 @@ __wt_spin_destroy(WT_SESSION_IMPL *session, WT_SPINLOCK *t) } } -#if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\ - SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE +#if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \ + SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE /* * __wt_spin_trylock -- diff --git a/src/include/schema.h b/src/include/schema.h index fa836084834..8b8ee5616d1 100644 --- a/src/include/schema.h +++ b/src/include/schema.h @@ -323,7 +323,7 @@ struct __wt_table { F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_READ); \ } \ if (__handle_write_locked) { \ - __wt_writelock(session, &__conn->dhandle_lock); \ + __wt_writelock(session, &__conn->dhandle_lock); \ F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE); \ } \ } while (0) diff --git a/src/include/stat.h b/src/include/stat.h index beb589dc0ef..fa62cf27693 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -374,9 +374,11 @@ struct __wt_connection_stats { int64_t write_io; int64_t cursor_create; int64_t cursor_insert; + int64_t cursor_modify; int64_t cursor_next; int64_t cursor_prev; int64_t cursor_remove; + int64_t cursor_reserve; int64_t cursor_reset; int64_t cursor_restart; int64_t cursor_search; @@ -609,9 +611,11 @@ struct __wt_dsrc_stats { int64_t cursor_remove_bytes; int64_t cursor_update_bytes; int64_t cursor_insert; + int64_t cursor_modify; int64_t cursor_next; int64_t cursor_prev; int64_t cursor_remove; + int64_t cursor_reserve; int64_t cursor_reset; int64_t cursor_restart; int64_t cursor_search; diff --git a/src/include/txn.i b/src/include/txn.i index 4b6ba17853f..f7321af5b12 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -235,7 +235,7 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd) { /* Skip reserved place-holders, they're never visible. */ for (; upd != NULL; upd = upd->next) - if (!WT_UPDATE_RESERVED_ISSET(upd) && + if (upd->type != WT_UPDATE_RESERVED && __wt_txn_visible(session, upd->txnid)) break; diff --git a/src/include/verify_build.h b/src/include/verify_build.h index d2ccf206990..e93f5931c21 100644 --- a/src/include/verify_build.h +++ b/src/include/verify_build.h @@ -52,6 +52,7 @@ __wt_verify_build(void) /* Check specific structures weren't padded. */ WT_SIZE_CHECK(WT_BLOCK_DESC, WT_BLOCK_DESC_SIZE); WT_SIZE_CHECK(WT_REF, WT_REF_SIZE); + WT_SIZE_CHECK(WT_UPDATE, WT_UPDATE_SIZE); /* Check specific structures were padded. */ #define WT_PADDING_CHECK(s) \ diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index b93fbebef25..5e76b2915b1 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -49,12 +49,6 @@ extern "C" { #define WT_ATTRIBUTE_LIBRARY_VISIBLE __attribute__((visibility("default"))) #endif -#ifdef SWIG -%{ -#include -%} -#endif - /*! * @defgroup wt WiredTiger API * The functions, handles and methods applications use to access and manage @@ -84,6 +78,7 @@ struct __wt_extractor; typedef struct __wt_extractor WT_EXTRACTOR; struct __wt_file_handle; typedef struct __wt_file_handle WT_FILE_HANDLE; struct __wt_file_system; typedef struct __wt_file_system WT_FILE_SYSTEM; struct __wt_item; typedef struct __wt_item WT_ITEM; +struct __wt_modify; typedef struct __wt_modify WT_MODIFY; struct __wt_session; typedef struct __wt_session WT_SESSION; #if defined(SWIGJAVA) @@ -137,6 +132,43 @@ struct __wt_item { #endif }; +/*! + * A set of modifications for a value, including a pointer to new data and a + * length, plus a target offset in the value and an optional length of data + * in the value to be replaced. + * + * WT_MODIFY structures do not need to be cleared before use. + */ +struct __wt_modify { + /*! + * New data. The size of the new data may be zero when no new data is + * provided. + */ + WT_ITEM data; + + /*! + * The zero-based byte offset in the value where the new data is placed. + * + * If the offset is past the end of the value, nul bytes are appended to + * the value up to the specified offset. + */ + size_t offset; + + /*! + * The number of bytes in the value to be replaced. + * + * If the size is zero, no bytes from the value are replaced and the new + * data is inserted. + * + * If the offset is past the end of the value, the size is ignored. + * + * If the offset plus the size overlaps the end of the previous value, + * bytes from the offset to the end of the value are replaced and any + * remaining new data is appended. + */ + size_t size; +}; + /*! * The maximum packed size of a 64-bit integer. The ::wiredtiger_struct_pack * function will pack single long integers into at most this many bytes. @@ -445,6 +477,38 @@ struct __wt_cursor { */ int __F(insert)(WT_CURSOR *cursor); + /*! + * Modify an existing record. + * + * Both the key and value must be set and the record must already exist; + * the record will be updated. + * + * Modification structures are applied in order, and later modifications + * can update earlier modifications. + * + * The modify method is only supported on raw byte arrays accessed using + * a WT_ITEM structure, that is, a format type of \c u. + * + * @snippet ex_all.c Modify an existing record + * + * On success, the cursor ends positioned at the modified record; to + * minimize cursor resources, the WT_CURSOR::reset method should be + * called as soon as the cursor no longer needs that position. + * + * The maximum length of a single column stored in a table is not fixed + * (as it partially depends on the underlying file configuration), but + * is always a small number of bytes less than 4GB. + * + * @param cursor the cursor handle + * @param entries an array of modification data structures + * @param nentries the number of modification data structures + * @errors + * In particular, if \c in_memory is configured for the database and + * the modify requires more than the configured cache size to complete, + * ::WT_CACHE_FULL is returned. + */ + int __F(modify)(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries); + /*! * Update an existing record and optionally insert a record. * @@ -474,7 +538,7 @@ struct __wt_cursor { * @errors * In particular, if \c overwrite=false is configured and no record with * the specified key exists, ::WT_NOTFOUND is returned. - * Also, if \c in_memory is configured for the database and the insert + * Also, if \c in_memory is configured for the database and the update * requires more than the configured cache size to complete, * ::WT_CACHE_FULL is returned. */ @@ -4614,292 +4678,296 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CURSOR_CREATE 1115 /*! cursor: cursor insert calls */ #define WT_STAT_CONN_CURSOR_INSERT 1116 +/*! cursor: cursor modify calls */ +#define WT_STAT_CONN_CURSOR_MODIFY 1117 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1117 +#define WT_STAT_CONN_CURSOR_NEXT 1118 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1118 +#define WT_STAT_CONN_CURSOR_PREV 1119 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1119 +#define WT_STAT_CONN_CURSOR_REMOVE 1120 +/*! cursor: cursor reserve calls */ +#define WT_STAT_CONN_CURSOR_RESERVE 1121 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1120 +#define WT_STAT_CONN_CURSOR_RESET 1122 /*! cursor: cursor restarted searches */ -#define WT_STAT_CONN_CURSOR_RESTART 1121 +#define WT_STAT_CONN_CURSOR_RESTART 1123 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1122 +#define WT_STAT_CONN_CURSOR_SEARCH 1124 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1123 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1125 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1124 +#define WT_STAT_CONN_CURSOR_UPDATE 1126 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1125 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1127 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1126 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1128 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1127 +#define WT_STAT_CONN_DH_SWEEP_REF 1129 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1128 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1130 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1129 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1131 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1130 +#define WT_STAT_CONN_DH_SWEEP_TOD 1132 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1131 +#define WT_STAT_CONN_DH_SWEEPS 1133 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1132 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1134 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1133 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1135 /*! lock: checkpoint lock acquisitions */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1134 +#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1136 /*! lock: checkpoint lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1135 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1137 /*! lock: checkpoint lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1136 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1138 /*! lock: handle-list lock eviction thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1137 +#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1139 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1138 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1140 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1139 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1141 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1140 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1142 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1141 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1143 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1142 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1144 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1143 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1145 /*! lock: table lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_COUNT 1144 +#define WT_STAT_CONN_LOCK_TABLE_COUNT 1146 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1145 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1147 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1146 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1148 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1147 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1149 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1148 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1150 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1149 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1151 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1150 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1152 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1151 +#define WT_STAT_CONN_LOG_FLUSH 1153 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1152 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1154 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1153 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1155 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1154 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1156 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1155 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1157 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1156 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1158 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1157 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1159 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1158 +#define WT_STAT_CONN_LOG_SCANS 1160 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1159 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1161 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1160 +#define WT_STAT_CONN_LOG_WRITE_LSN 1162 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1161 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1163 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1162 +#define WT_STAT_CONN_LOG_SYNC 1164 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1163 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1165 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1164 +#define WT_STAT_CONN_LOG_SYNC_DIR 1166 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1165 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1167 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1166 +#define WT_STAT_CONN_LOG_WRITES 1168 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1167 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1169 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1168 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1170 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1169 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1171 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1170 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1172 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1171 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1173 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1172 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1174 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1173 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1175 /*! log: slot close lost race */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1174 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1176 /*! log: slot close unbuffered waits */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1175 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1177 /*! log: slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1176 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1178 /*! log: slot join atomic update races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1177 +#define WT_STAT_CONN_LOG_SLOT_RACES 1179 /*! log: slot join calls atomic updates raced */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1178 +#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1180 /*! log: slot join calls did not yield */ -#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1179 +#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1181 /*! log: slot join calls found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1180 +#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1182 /*! log: slot join calls slept */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1181 +#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1183 /*! log: slot join calls yielded */ -#define WT_STAT_CONN_LOG_SLOT_YIELD 1182 +#define WT_STAT_CONN_LOG_SLOT_YIELD 1184 /*! log: slot join found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1183 +#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1185 /*! log: slot joins yield time (usecs) */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1184 +#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1186 /*! log: slot transitions unable to find free slot */ -#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1185 +#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1187 /*! log: slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1186 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1188 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1187 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1189 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1188 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1190 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1189 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1191 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1190 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1192 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1191 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1193 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1192 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1194 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1193 +#define WT_STAT_CONN_REC_PAGES 1195 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1194 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1196 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1195 +#define WT_STAT_CONN_REC_PAGE_DELETE 1197 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1196 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1198 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1197 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1199 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1198 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1200 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1199 +#define WT_STAT_CONN_SESSION_OPEN 1201 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1200 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1202 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1201 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1203 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1202 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1204 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1203 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1205 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1204 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1206 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1205 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1207 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1206 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1208 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1207 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1209 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1208 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1210 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1209 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1211 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1210 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1212 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1211 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1213 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1212 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1214 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1213 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1215 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1214 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1216 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1215 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1217 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1216 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1218 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1217 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1219 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1218 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1220 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1219 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1221 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1220 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1222 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1221 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1223 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1222 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1224 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1223 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1225 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1224 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1226 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1225 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1227 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1226 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1228 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1227 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1229 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1228 +#define WT_STAT_CONN_PAGE_SLEEP 1230 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1229 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1231 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1230 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1232 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1231 +#define WT_STAT_CONN_TXN_BEGIN 1233 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1232 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1234 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1233 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1235 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1234 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1236 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1235 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1237 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1236 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1238 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1237 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1239 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1238 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1240 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1239 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1241 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1240 +#define WT_STAT_CONN_TXN_CHECKPOINT 1242 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1241 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1243 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1242 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1244 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1243 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1245 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1244 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1246 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1245 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1247 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1246 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1248 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1247 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1249 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1248 +#define WT_STAT_CONN_TXN_SYNC 1250 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1249 +#define WT_STAT_CONN_TXN_COMMIT 1251 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1250 +#define WT_STAT_CONN_TXN_ROLLBACK 1252 /*! * @} @@ -5181,61 +5249,65 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2092 /*! cursor: insert calls */ #define WT_STAT_DSRC_CURSOR_INSERT 2093 +/*! cursor: modify calls */ +#define WT_STAT_DSRC_CURSOR_MODIFY 2094 /*! cursor: next calls */ -#define WT_STAT_DSRC_CURSOR_NEXT 2094 +#define WT_STAT_DSRC_CURSOR_NEXT 2095 /*! cursor: prev calls */ -#define WT_STAT_DSRC_CURSOR_PREV 2095 +#define WT_STAT_DSRC_CURSOR_PREV 2096 /*! cursor: remove calls */ -#define WT_STAT_DSRC_CURSOR_REMOVE 2096 +#define WT_STAT_DSRC_CURSOR_REMOVE 2097 +/*! cursor: reserve calls */ +#define WT_STAT_DSRC_CURSOR_RESERVE 2098 /*! cursor: reset calls */ -#define WT_STAT_DSRC_CURSOR_RESET 2097 +#define WT_STAT_DSRC_CURSOR_RESET 2099 /*! cursor: restarted searches */ -#define WT_STAT_DSRC_CURSOR_RESTART 2098 +#define WT_STAT_DSRC_CURSOR_RESTART 2100 /*! cursor: search calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH 2099 +#define WT_STAT_DSRC_CURSOR_SEARCH 2101 /*! cursor: search near calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2100 +#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2102 /*! cursor: truncate calls */ -#define WT_STAT_DSRC_CURSOR_TRUNCATE 2101 +#define WT_STAT_DSRC_CURSOR_TRUNCATE 2103 /*! cursor: update calls */ -#define WT_STAT_DSRC_CURSOR_UPDATE 2102 +#define WT_STAT_DSRC_CURSOR_UPDATE 2104 /*! reconciliation: dictionary matches */ -#define WT_STAT_DSRC_REC_DICTIONARY 2103 +#define WT_STAT_DSRC_REC_DICTIONARY 2105 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2104 +#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2106 /*! * reconciliation: internal page key bytes discarded using suffix * compression */ -#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2105 +#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2107 /*! reconciliation: internal page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2106 +#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2108 /*! reconciliation: internal-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2107 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2109 /*! reconciliation: leaf page key bytes discarded using prefix compression */ -#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2108 +#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2110 /*! reconciliation: leaf page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2109 +#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2111 /*! reconciliation: leaf-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2110 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2112 /*! reconciliation: maximum blocks required for a page */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2111 +#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2113 /*! reconciliation: overflow values written */ -#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2112 +#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2114 /*! reconciliation: page checksum matches */ -#define WT_STAT_DSRC_REC_PAGE_MATCH 2113 +#define WT_STAT_DSRC_REC_PAGE_MATCH 2115 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_DSRC_REC_PAGES 2114 +#define WT_STAT_DSRC_REC_PAGES 2116 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_DSRC_REC_PAGES_EVICTION 2115 +#define WT_STAT_DSRC_REC_PAGES_EVICTION 2117 /*! reconciliation: pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE 2116 +#define WT_STAT_DSRC_REC_PAGE_DELETE 2118 /*! session: object compaction */ -#define WT_STAT_DSRC_SESSION_COMPACT 2117 +#define WT_STAT_DSRC_SESSION_COMPACT 2119 /*! session: open cursor count */ -#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2118 +#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2120 /*! transaction: update conflicts */ -#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2119 +#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2121 /*! * @} diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 90750a27ab3..99920367600 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -1756,6 +1756,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session, __clsm_search, /* search */ __clsm_search_near, /* search-near */ __clsm_insert, /* insert */ + __wt_cursor_modify_notsup, /* modify */ __clsm_update, /* update */ __clsm_remove, /* remove */ __clsm_reserve, /* reserve */ diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 52a279b8c96..8f7769766a9 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -1227,7 +1227,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, } /* Reconciliation should never see a reserved update. */ - WT_ASSERT(session, *updp == NULL || !WT_UPDATE_RESERVED_ISSET(*updp)); + WT_ASSERT(session, + *updp == NULL || (*updp)->type != WT_UPDATE_RESERVED); /* * If all of the updates were aborted, quit. This test is not strictly @@ -1411,14 +1412,14 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, * place a deleted record at the end of the update list. */ if (vpack == NULL || vpack->type == WT_CELL_DEL) - WT_RET(__wt_update_alloc( - session, NULL, &append, ¬used, true, false)); + WT_RET(__wt_update_alloc(session, + NULL, &append, ¬used, WT_UPDATE_DELETED)); else { WT_RET(__wt_scr_alloc(session, 0, &tmp)); if ((ret = __wt_page_cell_data_ref( session, page, vpack, tmp)) == 0) ret = __wt_update_alloc(session, - tmp, &append, ¬used, false, false); + tmp, &append, ¬used, WT_UPDATE_STANDARD); __wt_scr_free(session, &tmp); WT_RET(ret); } @@ -3675,20 +3676,20 @@ __rec_update_las(WT_SESSION_IMPL *session, * restored, obviously. */ do { - if (WT_UPDATE_RESERVED_ISSET(upd)) + if (upd->type == WT_UPDATE_RESERVED) continue; cursor->set_key(cursor, btree_id, &las_addr, ++las_counter, list->onpage_txn, key); - if (WT_UPDATE_DELETED_ISSET(upd)) + if (upd->type == WT_UPDATE_DELETED) las_value.size = 0; else { las_value.data = WT_UPDATE_DATA(upd); las_value.size = upd->size; } cursor->set_value( - cursor, upd->txnid, upd->size, &las_value); + cursor, upd->txnid, upd->type, &las_value); WT_ERR(cursor->insert(cursor)); ++insert_cnt; @@ -4614,7 +4615,7 @@ record_loop: /* update_no_copy = true; /* No data copy */ repeat_count = 1; /* Single record */ - deleted = WT_UPDATE_DELETED_ISSET(upd); + deleted = upd->type == WT_UPDATE_DELETED; if (!deleted) { data = WT_UPDATE_DATA(upd); size = upd->size; @@ -4849,7 +4850,7 @@ compare: /* } } else { deleted = upd == NULL || - WT_UPDATE_DELETED_ISSET(upd); + upd->type == WT_UPDATE_DELETED; if (!deleted) { data = WT_UPDATE_DATA(upd); size = upd->size; @@ -5394,7 +5395,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, __wt_ovfl_cache(session, page, rip, vpack)); /* If this key/value pair was deleted, we're done. */ - if (WT_UPDATE_DELETED_ISSET(upd)) { + if (upd->type == WT_UPDATE_DELETED) { /* * Overflow keys referencing discarded values * are no longer useful, discard the backing @@ -5604,7 +5605,7 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) for (; ins != NULL; ins = WT_SKIP_NEXT(ins)) { /* Look for an update. */ WT_RET(__rec_txn_read(session, r, ins, NULL, NULL, &upd)); - if (upd == NULL || WT_UPDATE_DELETED_ISSET(upd)) + if (upd == NULL || upd->type == WT_UPDATE_DELETED) continue; if (upd->size == 0) /* Build value cell. */ diff --git a/src/support/stat.c b/src/support/stat.c index 8711e6b9bc1..bc40244f5e6 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -97,9 +97,11 @@ static const char * const __stats_dsrc_desc[] = { "cursor: cursor-remove key bytes removed", "cursor: cursor-update value bytes updated", "cursor: insert calls", + "cursor: modify calls", "cursor: next calls", "cursor: prev calls", "cursor: remove calls", + "cursor: reserve calls", "cursor: reset calls", "cursor: restarted searches", "cursor: search calls", @@ -259,9 +261,11 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats) stats->cursor_remove_bytes = 0; stats->cursor_update_bytes = 0; stats->cursor_insert = 0; + stats->cursor_modify = 0; stats->cursor_next = 0; stats->cursor_prev = 0; stats->cursor_remove = 0; + stats->cursor_reserve = 0; stats->cursor_reset = 0; stats->cursor_restart = 0; stats->cursor_search = 0; @@ -410,9 +414,11 @@ __wt_stat_dsrc_aggregate_single( to->cursor_remove_bytes += from->cursor_remove_bytes; to->cursor_update_bytes += from->cursor_update_bytes; to->cursor_insert += from->cursor_insert; + to->cursor_modify += from->cursor_modify; to->cursor_next += from->cursor_next; to->cursor_prev += from->cursor_prev; to->cursor_remove += from->cursor_remove; + to->cursor_reserve += from->cursor_reserve; to->cursor_reset += from->cursor_reset; to->cursor_restart += from->cursor_restart; to->cursor_search += from->cursor_search; @@ -588,9 +594,11 @@ __wt_stat_dsrc_aggregate( to->cursor_remove_bytes += WT_STAT_READ(from, cursor_remove_bytes); to->cursor_update_bytes += WT_STAT_READ(from, cursor_update_bytes); to->cursor_insert += WT_STAT_READ(from, cursor_insert); + to->cursor_modify += WT_STAT_READ(from, cursor_modify); to->cursor_next += WT_STAT_READ(from, cursor_next); to->cursor_prev += WT_STAT_READ(from, cursor_prev); to->cursor_remove += WT_STAT_READ(from, cursor_remove); + to->cursor_reserve += WT_STAT_READ(from, cursor_reserve); to->cursor_reset += WT_STAT_READ(from, cursor_reset); to->cursor_restart += WT_STAT_READ(from, cursor_restart); to->cursor_search += WT_STAT_READ(from, cursor_search); @@ -741,9 +749,11 @@ static const char * const __stats_connection_desc[] = { "connection: total write I/Os", "cursor: cursor create calls", "cursor: cursor insert calls", + "cursor: cursor modify calls", "cursor: cursor next calls", "cursor: cursor prev calls", "cursor: cursor remove calls", + "cursor: cursor reserve calls", "cursor: cursor reset calls", "cursor: cursor restarted searches", "cursor: cursor search calls", @@ -1034,9 +1044,11 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->write_io = 0; stats->cursor_create = 0; stats->cursor_insert = 0; + stats->cursor_modify = 0; stats->cursor_next = 0; stats->cursor_prev = 0; stats->cursor_remove = 0; + stats->cursor_reserve = 0; stats->cursor_reset = 0; stats->cursor_restart = 0; stats->cursor_search = 0; @@ -1347,9 +1359,11 @@ __wt_stat_connection_aggregate( to->write_io += WT_STAT_READ(from, write_io); to->cursor_create += WT_STAT_READ(from, cursor_create); to->cursor_insert += WT_STAT_READ(from, cursor_insert); + to->cursor_modify += WT_STAT_READ(from, cursor_modify); to->cursor_next += WT_STAT_READ(from, cursor_next); to->cursor_prev += WT_STAT_READ(from, cursor_prev); to->cursor_remove += WT_STAT_READ(from, cursor_remove); + to->cursor_reserve += WT_STAT_READ(from, cursor_reserve); to->cursor_reset += WT_STAT_READ(from, cursor_reset); to->cursor_restart += WT_STAT_READ(from, cursor_restart); to->cursor_search += WT_STAT_READ(from, cursor_search); diff --git a/src/txn/txn.c b/src/txn/txn.c index ac4be37f855..d9edbb80564 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -600,7 +600,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) * Switch reserved operations to abort to simplify * obsolete update list truncation. */ - if (WT_UPDATE_RESERVED_ISSET(op->u.upd)) + if (op->u.upd->type == WT_UPDATE_RESERVED) op->u.upd->txnid = WT_TXN_ABORTED; break; case WT_TXN_OP_REF: diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c index fae2027e1ec..74dc679a6ef 100644 --- a/src/txn/txn_log.c +++ b/src/txn/txn_log.c @@ -82,12 +82,12 @@ __txn_op_log(WT_SESSION_IMPL *session, * or update, all of which require log records. We shouldn't ever log * reserve operations. */ - WT_ASSERT(session, !WT_UPDATE_RESERVED_ISSET(upd)); + WT_ASSERT(session, upd->type != WT_UPDATE_RESERVED); if (cbt->btree->type == BTREE_ROW) { #ifdef HAVE_DIAGNOSTIC __txn_op_log_row_key_check(session, cbt); #endif - if (WT_UPDATE_DELETED_ISSET(upd)) + if (upd->type == WT_UPDATE_DELETED) WT_RET(__wt_logop_row_remove_pack( session, logrec, op->fileid, &cursor->key)); else @@ -97,7 +97,7 @@ __txn_op_log(WT_SESSION_IMPL *session, recno = WT_INSERT_RECNO(cbt->ins); WT_ASSERT(session, recno != WT_RECNO_OOB); - if (WT_UPDATE_DELETED_ISSET(upd)) + if (upd->type == WT_UPDATE_DELETED) WT_RET(__wt_logop_col_remove_pack( session, logrec, op->fileid, recno)); else diff --git a/test/csuite/scope/main.c b/test/csuite/scope/main.c index 8b9a79decd0..83d6bd479d9 100644 --- a/test/csuite/scope/main.c +++ b/test/csuite/scope/main.c @@ -28,7 +28,7 @@ #include "test_util.h" #define KEY "key" -#define VALUE "value" +#define VALUE "value,value,value" static int ignore_errors; @@ -63,44 +63,55 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) { struct { const char *op; - enum { INSERT, SEARCH, SEARCH_NEAR, + enum { INSERT, MODIFY, SEARCH, SEARCH_NEAR, REMOVE, REMOVE_POS, RESERVE, UPDATE } func; const char *config; } *op, ops[] = { /* - * The ops order is fixed and shouldn't change, that is, insert - * has to happen first so search, update and remove operations - * are possible, and remove has to be last. + * The ops order is specific: insert has to happen first so + * other operations are possible, and remove has to be last. */ { "insert", INSERT, NULL, }, { "search", SEARCH, NULL, }, { "search", SEARCH_NEAR, NULL, }, { "reserve", RESERVE, NULL, }, + { "insert", MODIFY, NULL, }, { "update", UPDATE, NULL, }, { "remove", REMOVE, NULL, }, { "remove", REMOVE_POS, NULL, }, { NULL, INSERT, NULL } }; WT_CURSOR *cursor; +#define MODIFY_ENTRIES 2 + WT_MODIFY entries[MODIFY_ENTRIES]; + WT_ITEM vu; uint64_t keyr; - const char *key, *value; + const char *key, *vs; char keybuf[100], valuebuf[100]; int exact; - bool recno; + bool recno, vstring; /* Reserve requires a running transaction. */ testutil_check(session->begin_transaction(session, NULL)); cursor = NULL; for (op = ops; op->op != NULL; op++) { - key = value = NULL; + key = vs = NULL; + memset(&vu, 0, sizeof(vu)); /* Open a cursor. */ if (cursor != NULL) testutil_check(cursor->close(cursor)); testutil_check(session->open_cursor( session, uri, NULL, op->config, &cursor)); + + /* Operations change based on the key/value formats. */ recno = strcmp(cursor->key_format, "r") == 0; + vstring = strcmp(cursor->value_format, "S") == 0; + + /* Modify is only possible with "item" values. */ + if (vstring && op->func == MODIFY) + continue; /* * Set up application buffers so we can detect overwrites @@ -114,7 +125,12 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) cursor->set_key(cursor, keybuf); } strcpy(valuebuf, VALUE); - cursor->set_value(cursor, valuebuf); + if (vstring) + cursor->set_value(cursor, valuebuf); + else { + vu.size = strlen(vu.data = valuebuf); + cursor->set_value(cursor, &vu); + } /* * The application must keep key and value memory valid until @@ -127,6 +143,20 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) case INSERT: testutil_check(cursor->insert(cursor)); break; + case MODIFY: + /* Modify, but don't really change anything. */ + entries[0].data.data = &VALUE[0]; + entries[0].data.size = 2; + entries[0].offset = 0; + entries[0].size = 2; + entries[1].data.data = &VALUE[3]; + entries[1].data.size = 5; + entries[1].offset = 3; + entries[1].size = 5; + + testutil_check( + cursor->modify(cursor, entries, MODIFY_ENTRIES)); + break; case SEARCH: testutil_check(cursor->search(cursor)); break; @@ -180,7 +210,12 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) else testutil_assert( cursor->get_key(cursor, &key) != 0); - testutil_assert(cursor->get_value(cursor, &value) != 0); + if (vstring) + testutil_assert( + cursor->get_value(cursor, &vs) != 0); + else + testutil_assert( + cursor->get_value(cursor, &vu) != 0); testutil_assert(ignore_errors == 0); break; case REMOVE_POS: @@ -201,16 +236,22 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) testutil_assert(strcmp(key, KEY) == 0); } ignore_errors = 1; - testutil_assert(cursor->get_value(cursor, &value) != 0); + if (vstring) + testutil_assert( + cursor->get_value(cursor, &vs) != 0); + else + testutil_assert( + cursor->get_value(cursor, &vu) != 0); testutil_assert(ignore_errors == 0); break; + case MODIFY: case RESERVE: case SEARCH: case SEARCH_NEAR: case UPDATE: /* - * Reserve, search, search-near and update position the - * cursor and have both a key and value. + * Modify, reserve, search, search-near and update all + * position the cursor and have both a key and value. * * Any key/value should not reference application * memory. @@ -225,9 +266,19 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) testutil_assert(key != keybuf); testutil_assert(strcmp(key, KEY) == 0); } - testutil_assert(cursor->get_value(cursor, &value) == 0); - testutil_assert(value != valuebuf); - testutil_assert(strcmp(value, VALUE) == 0); + if (vstring) { + testutil_assert( + cursor->get_value(cursor, &vs) == 0); + testutil_assert(vs != valuebuf); + testutil_assert(strcmp(vs, VALUE) == 0); + } else { + testutil_assert( + cursor->get_value(cursor, &vu) == 0); + testutil_assert(vu.data != valuebuf); + testutil_assert(vu.size == strlen(VALUE)); + testutil_assert( + memcmp(vu.data, VALUE, strlen(VALUE)) == 0); + } break; } @@ -239,9 +290,16 @@ cursor_scope_ops(WT_SESSION *session, const char *uri) if (recno) cursor->set_key(cursor, (uint64_t)1); else { - cursor->set_key(cursor, KEY); + strcpy(keybuf, KEY); + cursor->set_key(cursor, keybuf); + } + strcpy(valuebuf, VALUE); + if (vstring) + cursor->set_value(cursor, valuebuf); + else { + vu.size = strlen(vu.data = valuebuf); + cursor->set_value(cursor, &vu); } - cursor->set_value(cursor, VALUE); testutil_check(cursor->insert(cursor)); } } @@ -272,11 +330,19 @@ main(int argc, char *argv[]) wiredtiger_open(opts->home, &event_handler, "create", &opts->conn)); run(opts->conn, "file:file.SS", "key_format=S,value_format=S"); + run(opts->conn, "file:file.Su", "key_format=S,value_format=u"); run(opts->conn, "file:file.rS", "key_format=r,value_format=S"); + run(opts->conn, "file:file.ru", "key_format=r,value_format=u"); + run(opts->conn, "lsm:lsm.SS", "key_format=S,value_format=S"); + run(opts->conn, "lsm:lsm.Su", "key_format=S,value_format=S"); run(opts->conn, "lsm:lsm.rS", "key_format=r,value_format=S"); + run(opts->conn, "lsm:lsm.ru", "key_format=r,value_format=S"); + run(opts->conn, "table:table.SS", "key_format=S,value_format=S"); + run(opts->conn, "table:table.Su", "key_format=S,value_format=u"); run(opts->conn, "table:table.rS", "key_format=r,value_format=S"); + run(opts->conn, "table:table.ru", "key_format=r,value_format=u"); testutil_cleanup(opts); diff --git a/test/format/config.c b/test/format/config.c index ce1dc6d6e8e..2685438af00 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -489,6 +489,8 @@ config_pct(void) #define CONFIG_DELETE_ENTRY 0 { "delete_pct", &g.c_delete_pct, 0 }, { "insert_pct", &g.c_insert_pct, 0 }, +#define CONFIG_MODIFY_ENTRY 2 + { "modify_pct", &g.c_modify_pct, 0 }, { "read_pct", &g.c_read_pct, 0 }, { "write_pct", &g.c_write_pct, 0 }, }; @@ -508,6 +510,16 @@ config_pct(void) testutil_die(EINVAL, "operation percentages total to more than 100%%"); + /* Cursor modify isn't possible for fixed-length column store. */ + if (g.type == FIX) { + if (config_is_perm("modify_pct")) + testutil_die(EINVAL, + "WT_CURSOR.modify not supported by fixed-length " + "column store or LSM"); + list[CONFIG_MODIFY_ENTRY].order = 0; + *list[CONFIG_MODIFY_ENTRY].vp = 0; + } + /* * If the delete percentage isn't nailed down, periodically set it to * 0 so salvage gets run. Don't do it on the first run, all our smoke @@ -547,8 +559,9 @@ config_pct(void) list[max_slot].order = 0; pct -= *list[max_slot].vp; } - testutil_assert(g.c_delete_pct + - g.c_insert_pct + g.c_read_pct + g.c_write_pct == 100); + + testutil_assert(g.c_delete_pct + g.c_insert_pct + + g.c_modify_pct + g.c_read_pct + g.c_write_pct == 100); } /* diff --git a/test/format/config.h b/test/format/config.h index bc809a764ce..3a41411e104 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -238,6 +238,10 @@ static CONFIG c[] = { "configure for mmap operations", /* 90% */ C_BOOL, 90, 0, 0, &g.c_mmap, NULL }, + { "modify_pct", + "percent operations that are value modifications", + C_IGNORE, 0, 0, 100, &g.c_modify_pct, NULL }, + { "ops", "the number of modification operations done per run", 0x0, 0, M(2), M(100), &g.c_ops, NULL }, @@ -323,7 +327,7 @@ static CONFIG c[] = { C_IGNORE|C_STRING, 0, 0, 0, NULL, &g.c_config_open }, { "write_pct", - "percent operations that are writes", + "percent operations that are value updates", C_IGNORE, 0, 0, 100, &g.c_write_pct, NULL }, { NULL, NULL, 0x0, 0, 0, 0, NULL, NULL } diff --git a/test/format/format.h b/test/format/format.h index fa898e439be..104ee1553f4 100644 --- a/test/format/format.h +++ b/test/format/format.h @@ -78,6 +78,8 @@ #define FORMAT_OPERATION_REPS 3 /* 3 thread operations sets */ +#define MAX_MODIFY_ENTRIES 5 /* maximum change vectors */ + typedef struct { char *home; /* Home directory */ char *home_backup; /* Hot-backup directory */ @@ -147,28 +149,28 @@ typedef struct { uint32_t c_bloom_hash_count; uint32_t c_bloom_oldest; uint32_t c_cache; - uint32_t c_compact; uint32_t c_checkpoints; - char *c_checksum; + char *c_checksum; uint32_t c_chunk_size; - char *c_compression; - char *c_encryption; - char *c_config_open; + uint32_t c_compact; + char *c_compression; + char *c_config_open; uint32_t c_data_extend; - char *c_data_source; + char *c_data_source; uint32_t c_delete_pct; uint32_t c_dictionary; uint32_t c_direct_io; + char *c_encryption; uint32_t c_evict_max; + char *c_file_type; uint32_t c_firstfit; - char *c_file_type; uint32_t c_huffman_key; uint32_t c_huffman_value; uint32_t c_in_memory; uint32_t c_insert_pct; uint32_t c_internal_key_truncation; uint32_t c_intl_page_max; - char *c_isolation; + char *c_isolation; uint32_t c_key_gap; uint32_t c_key_max; uint32_t c_key_min; @@ -176,22 +178,23 @@ typedef struct { uint32_t c_leak_memory; uint32_t c_logging; uint32_t c_logging_archive; - char *c_logging_compression; + char *c_logging_compression; uint32_t c_logging_prealloc; uint32_t c_long_running_txn; uint32_t c_lsm_worker_threads; uint32_t c_merge_max; uint32_t c_mmap; + uint32_t c_modify_pct; uint32_t c_ops; - uint32_t c_quiet; uint32_t c_prefix_compression; uint32_t c_prefix_compression_min; + uint32_t c_quiet; + uint32_t c_read_pct; + uint32_t c_rebalance; uint32_t c_repeat_data_pct; uint32_t c_reverse; uint32_t c_rows; uint32_t c_runs; - uint32_t c_read_pct; - uint32_t c_rebalance; uint32_t c_salvage; uint32_t c_split_pct; uint32_t c_statistics; diff --git a/test/format/ops.c b/test/format/ops.c index 6e3e3b783c5..02cce77eec2 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -29,6 +29,8 @@ #include "format.h" static int col_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); +static int col_modify( + TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); static int col_reserve(WT_CURSOR *, uint64_t, bool); static int col_update( @@ -37,6 +39,8 @@ static int nextprev(WT_CURSOR *, int); static void *ops(void *); static int row_insert( TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); +static int row_modify( + TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); static int row_reserve(WT_CURSOR *, WT_ITEM *, uint64_t, bool); static int row_update( @@ -403,7 +407,7 @@ snap_check(WT_CURSOR *cursor, static void * ops(void *arg) { - enum { INSERT, READ, REMOVE, UPDATE } op; + enum { INSERT, MODIFY, READ, REMOVE, UPDATE } op; SNAP_OPS *snap, snap_list[64]; TINFO *tinfo; WT_CONNECTION *conn; @@ -610,11 +614,12 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ op = REMOVE; else if (i < g.c_delete_pct + g.c_insert_pct) op = INSERT; - else if (i < - g.c_delete_pct + g.c_insert_pct + g.c_write_pct) + else if (i < g.c_delete_pct + + g.c_insert_pct + g.c_modify_pct) + op = MODIFY; + else if (i < g.c_delete_pct + + g.c_insert_pct + g.c_modify_pct + g.c_write_pct) op = UPDATE; - else - op = READ; } /* @@ -698,6 +703,30 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ testutil_assert(ret == 0 || ret == WT_ROLLBACK); } break; + case MODIFY: + ++tinfo->update; + switch (g.type) { + case ROW: + ret = row_modify(tinfo, cursor, + key, value, keyno, positioned); + break; + case VAR: + ret = col_modify(tinfo, cursor, + key, value, keyno, positioned); + break; + } + if (ret == 0) { + positioned = true; + if (SNAP_TRACK) + snap_track(snap++, keyno, NULL, value); + } else { + positioned = false; + if (ret == WT_ROLLBACK && intxn) + goto deadlock; + testutil_assert(ret == 0 || + ret == WT_NOTFOUND || ret == WT_ROLLBACK); + } + break; case READ: ++tinfo->search; ret = read_row(cursor, key, value, keyno); @@ -742,17 +771,15 @@ skip_checkpoint: /* Pick the next checkpoint operation. */ case UPDATE: update_instead_of_insert: ++tinfo->update; - - /* Update the row. */ switch (g.type) { case ROW: - ret = row_update(tinfo, - cursor, key, value, keyno, positioned); + ret = row_update(tinfo, cursor, + key, value, keyno, positioned); break; case FIX: case VAR: - ret = col_update(tinfo, - cursor, key, value, keyno, positioned); + ret = col_update(tinfo, cursor, + key, value, keyno, positioned); break; } if (ret == 0) { @@ -1168,6 +1195,235 @@ col_reserve(WT_CURSOR *cursor, uint64_t keyno, bool positioned) return (0); } +/* + * modify_build -- + * Generate a set of modify vectors, and copy what the final result + * should be into the value buffer. + */ +static bool +modify_build(TINFO *tinfo, + WT_CURSOR *cursor, WT_MODIFY *entries, int *nentriesp, WT_ITEM *value) +{ + static char repl[64]; + size_t len, size; + u_int i, nentries; + WT_ITEM *ta, _ta, *tb, _tb, *tmp; + + if (repl[0] == '\0') + memset(repl, '+', sizeof(repl)); + + ta = &_ta; + memset(ta, 0, sizeof(*ta)); + tb = &_tb; + memset(tb, 0, sizeof(*tb)); + + testutil_check(cursor->get_value(cursor, value)); + + /* + * Randomly select a number of byte changes, offsets and lengths. Start + * at least 11 bytes in so we skip the leading key information. + */ + nentries = mmrand(&tinfo->rnd, 1, MAX_MODIFY_ENTRIES); + for (i = 0; i < nentries; ++i) { + entries[i].data.data = repl; + entries[i].data.size = (size_t)mmrand(&tinfo->rnd, 0, 10); + entries[i].offset = (size_t)mmrand(&tinfo->rnd, 20, 40); + entries[i].size = (size_t)mmrand(&tinfo->rnd, 0, 10); + } + + /* + * Process the entries to figure out how large a buffer we need. This is + * a bit pessimistic because we're ignoring replacement bytes, but it's + * a simpler calculation. + */ + for (size = cursor->value.size, i = 0; i < nentries; ++i) { + if (entries[i].offset >= size) + size = entries[i].offset; + size += entries[i].data.size; + } + + /* If size is larger than the available buffer size, skip this one. */ + if (size >= value->memsize) + return (false); + + /* Allocate a pair of buffers. */ + ta->mem = dcalloc(size, sizeof(uint8_t)); + tb->mem = dcalloc(size, sizeof(uint8_t)); + + /* + * Use a brute-force process to create the value WiredTiger will create + * from this change vector. Don't do anything tricky to speed it up, we + * want to use a different algorithm from WiredTiger's, the idea is to + * bug-check the library. + */ + memcpy(ta->mem, value->data, value->size); + ta->size = value->size; + for (i = 0; i < nentries; ++i) { + /* Take leading bytes from the original, plus any gap bytes. */ + if (entries[i].offset >= ta->size) { + memcpy(tb->mem, ta->mem, ta->size); + if (entries[i].offset > ta->size) + memset((uint8_t *)tb->mem + ta->size, + '\0', entries[i].offset - ta->size); + } else + if (entries[i].offset > 0) + memcpy(tb->mem, ta->mem, entries[i].offset); + tb->size = entries[i].offset; + + /* Take replacement bytes. */ + if (entries[i].data.size > 0) { + memcpy((uint8_t *)tb->mem + tb->size, + entries[i].data.data, entries[i].data.size); + tb->size += entries[i].data.size; + } + + /* Take trailing bytes from the original. */ + len = entries[i].offset + entries[i].size; + if (ta->size > len) { + memcpy((uint8_t *)tb->mem + tb->size, + (uint8_t *)ta->mem + len, ta->size - len); + tb->size += ta->size - len; + } + testutil_assert(tb->size <= size); + + tmp = ta; + ta = tb; + tb = tmp; + } + + /* Copy the expected result into the value structure. */ + memcpy(value->mem, ta->mem, ta->size); + value->data = value->mem; + value->size = ta->size; + + free(ta->mem); + free(tb->mem); + + *nentriesp = (int)nentries; + return (true); +} + +/* + * row_modify -- + * Modify a row in a row-store file. + */ +static int +row_modify(TINFO *tinfo, WT_CURSOR *cursor, + WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) +{ + WT_DECL_RET; + WT_MODIFY entries[MAX_MODIFY_ENTRIES]; + int nentries; + + if (!positioned) { + key_gen(key, keyno); + cursor->set_key(cursor, key); + switch (ret = cursor->search(cursor)) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: + return (WT_ROLLBACK); + case WT_NOTFOUND: + return (WT_NOTFOUND); + default: + testutil_die(ret, + "row_modify: read row %" PRIu64 " by key", keyno); + } + } + + /* + * Generate a set of change vectors and copy the expected result into + * the value buffer. If the return value is non-zero, there wasn't a + * big enough value to work with, or for some reason we couldn't build + * a reasonable change vector. + */ + ret = WT_NOTFOUND; + if (modify_build(tinfo, cursor, entries, &nentries, value)) + ret = cursor->modify(cursor, entries, nentries); + switch (ret) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: + return (WT_ROLLBACK); + case WT_NOTFOUND: + return (WT_NOTFOUND); + default: + testutil_die(ret, + "row_modify: modify row %" PRIu64 " by key", keyno); + } + +#ifdef HAVE_BERKELEY_DB + if (!SINGLETHREADED) + return (0); + + bdb_update(key->data, key->size, value->data, value->size); +#endif + return (0); +} + +/* + * col_modify -- + * Modify a row in a column-store file. + */ +static int +col_modify(TINFO *tinfo, WT_CURSOR *cursor, + WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) +{ + WT_DECL_RET; + WT_MODIFY entries[MAX_MODIFY_ENTRIES]; + int nentries; + + if (!positioned) { + cursor->set_key(cursor, keyno); + switch (ret = cursor->search(cursor)) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: + return (WT_ROLLBACK); + case WT_NOTFOUND: + return (WT_NOTFOUND); + default: + testutil_die(ret, + "col_modify: read row %" PRIu64, keyno); + } + } + + /* + * Generate a set of change vectors and copy the expected result into + * the value buffer. If the return value is non-zero, there wasn't a + * big enough value to work with, or for some reason we couldn't build + * a reasonable change vector. + */ + ret = WT_NOTFOUND; + if (modify_build(tinfo, cursor, entries, &nentries, value)) + ret = cursor->modify(cursor, entries, nentries); + switch (ret) { + case 0: + break; + case WT_CACHE_FULL: + case WT_ROLLBACK: + return (WT_ROLLBACK); + case WT_NOTFOUND: + return (WT_NOTFOUND); + default: + testutil_die(ret, "col_modify: modify row %" PRIu64, keyno); + } + +#ifdef HAVE_BERKELEY_DB + if (!SINGLETHREADED) + return (0); + + key_gen(key, keyno); + bdb_update(key->data, key->size, value->data, value->size); +#else + (void)key; /* [-Wunused-variable] */ +#endif + return (0); +} + /* * row_update -- * Update a row in a row-store file. diff --git a/test/suite/test_cursor12.py b/test/suite/test_cursor12.py new file mode 100644 index 00000000000..827f37cfcef --- /dev/null +++ b/test/suite/test_cursor12.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from wtscenario import make_scenarios + +# test_cursor12.py +# Test cursor modify call +class test_cursor12(wttest.WiredTigerTestCase): + types = [ + ('file', dict(uri='file:modify')), + ('lsm', dict(uri='lsm:modify')), + ('table', dict(uri='table:modify')), + ] + scenarios = make_scenarios(types) + + # Smoke-test the modify API. + def test_modify_smoke(self): + # List with original value, final value, and modifications to get + # there. + list = [ + { + 'o' : 'ABCDEFGH', # no operation + 'f' : 'ABCDEFGH', + 'mods' : [['', 0, 0]] + },{ + 'o' : 'ABCDEFGH', # no operation with offset + 'f' : 'ABCDEFGH', + 'mods' : [['', 4, 0]] + },{ + 'o' : 'ABCDEFGH', # rewrite beginning + 'f' : '--CDEFGH', + 'mods' : [['--', 0, 2]] + },{ + 'o' : 'ABCDEFGH', # rewrite end + 'f' : 'ABCDEF--', + 'mods' : [['--', 6, 2]] + },{ + 'o' : 'ABCDEFGH', # append + 'f' : 'ABCDEFGH--', + 'mods' : [['--', 8, 2]] + },{ + 'o' : 'ABCDEFGH', # append with gap + 'f' : 'ABCDEFGH\00\00--', + 'mods' : [['--', 10, 2]] + },{ + 'o' : 'ABCDEFGH', # multiple replacements + 'f' : 'A-C-E-G-', + 'mods' : [['-', 1, 1], ['-', 3, 1], ['-', 5, 1], ['-', 7, 1]] + },{ + 'o' : 'ABCDEFGH', # multiple overlapping replacements + 'f' : 'A-CDEFGH', + 'mods' : [['+', 1, 1], ['+', 1, 1], ['+', 1, 1], ['-', 1, 1]] + },{ + 'o' : 'ABCDEFGH', # multiple overlapping gap replacements + 'f' : 'ABCDEFGH\00\00--', + 'mods' : [['+', 10, 1], ['+', 10, 1], ['+', 10, 1], ['--', 10, 2]] + },{ + 'o' : 'ABCDEFGH', # shrink beginning + 'f' : '--EFGH', + 'mods' : [['--', 0, 4]] + },{ + 'o' : 'ABCDEFGH', # shrink middle + 'f' : 'AB--GH', + 'mods' : [['--', 2, 4]] + },{ + 'o' : 'ABCDEFGH', # shrink end + 'f' : 'ABCD--', + 'mods' : [['--', 4, 4]] + },{ + 'o' : 'ABCDEFGH', # grow beginning + 'f' : '--ABCDEFGH', + 'mods' : [['--', 0, 0]] + },{ + 'o' : 'ABCDEFGH', # grow middle + 'f' : 'ABCD--EFGH', + 'mods' : [['--', 4, 0]] + },{ + 'o' : 'ABCDEFGH', # grow end + 'f' : 'ABCDEFGH--', + 'mods' : [['--', 8, 0]] + },{ + 'o' : 'ABCDEFGH', # discard beginning + 'f' : 'EFGH', + 'mods' : [['', 0, 4]] + },{ + 'o' : 'ABCDEFGH', # discard middle + 'f' : 'ABGH', + 'mods' : [['', 2, 4]] + },{ + 'o' : 'ABCDEFGH', # discard end + 'f' : 'ABCD', + 'mods' : [['', 4, 4]] + },{ + 'o' : 'ABCDEFGH', # overlap the end and append + 'f' : 'ABCDEF--XX', + 'mods' : [['--XX', 6, 2]] + },{ + 'o' : 'ABCDEFGH', # overlap the end with incorrect size + 'f' : 'ABCDEFG01234567', + 'mods' : [['01234567', 7, 2000]] + } + ] + + self.session.create(self.uri, 'key_format=S,value_format=u') + cursor = self.session.open_cursor(self.uri, None, None) + + # For each test in the list, set the original value, apply modifications + # in order, then confirm the final state. + for i in list: + cursor['ABC'] = i['o'] + + mods = [] + for j in i['mods']: + mod = wiredtiger.Modify(j[0], j[1], j[2]) + mods.append(mod) + + cursor.set_key('ABC') + cursor.modify(mods) + self.assertEquals(str(cursor['ABC']), i['f']) + + # Check that modify returns not-found after a delete. + def test_modify_delete(self): + self.session.create(self.uri, 'key_format=S,value_format=u') + cursor = self.session.open_cursor(self.uri, None, None) + cursor['ABC'] = 'ABCDEFGH' + cursor.set_key('ABC') + cursor.remove() + + mods = [] + mod = wiredtiger.Modify('ABCD', 3, 3) + mods.append(mod) + + cursor.set_key('ABC') + #self.assertEqual(cursor.modify(mods), wiredtiger.WT_NOTFOUND) + self.assertRaises( + wiredtiger.WiredTigerError, lambda:cursor.modify(mods)) + +if __name__ == '__main__': + wttest.run() -- cgit v1.2.1 From db14d312f68769f358662f5ea7aa74d61b9cd35d Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Thu, 25 May 2017 14:48:26 -0400 Subject: WT-3333 Fixes for zero length strings packed/unpacked in a 'u' format via Python. (#3432) --- lang/python/wiredtiger/packing.py | 5 +++- test/java/com/wiredtiger/test/PackTest.java | 41 +++++++++++++++++++++++++++++ test/suite/test_pack.py | 5 ++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/lang/python/wiredtiger/packing.py b/lang/python/wiredtiger/packing.py index 0506f2afda9..fb674538b76 100644 --- a/lang/python/wiredtiger/packing.py +++ b/lang/python/wiredtiger/packing.py @@ -94,6 +94,9 @@ def unpack(fmt, s): elif f == 'S': size = s.find('\0') elif f == 'u' and offset == len(fmt) - 1: + # A WT_ITEM with a NULL data field will be appear as None. + if s == None: + s = '' size = len(s) else: # Note: 'U' is used internally, and may be exposed to us. @@ -169,7 +172,7 @@ def pack(fmt, *values): result += val[:l] if f == 'S' and not havesize: result += '\0' - elif size > l: + elif size > l and havesize: result += '\0' * (size - l) elif f in 't': # bit type, size is number of bits diff --git a/test/java/com/wiredtiger/test/PackTest.java b/test/java/com/wiredtiger/test/PackTest.java index a162fdfd0f0..302313169cd 100644 --- a/test/java/com/wiredtiger/test/PackTest.java +++ b/test/java/com/wiredtiger/test/PackTest.java @@ -183,6 +183,47 @@ public class PackTest { Assert.assertEquals(unpacker.getString(), "Hell"); } + @Test + public void pack08() + throws WiredTigerPackingException { + String format = "u"; + PackOutputStream packer = new PackOutputStream(format); + PackInputStream unpacker; + byte[] b0 = {}; + byte[] b1 = { 0x00 }; + byte[] packed; + + packer.addByteArray(b0); + packed = packer.getValue(); + unpacker = new PackInputStream(format, packed); + Assert.assertTrue(java.util.Arrays.equals( + unpacker.getByteArray(), b0)); + + packer = new PackOutputStream(format); + packer.addByteArray(b1); + packed = packer.getValue(); + unpacker = new PackInputStream(format, packed); + Assert.assertTrue(java.util.Arrays.equals( + unpacker.getByteArray(), b1)); + + format = "uu"; + for (int i = 0; i < 2; i++) { + byte[] arg0 = (i == 0 ? b0 : b1); + for (int j = 0; j < 2; j++) { + byte[] arg1 = (j == 0 ? b0 : b1); + packer = new PackOutputStream(format); + packer.addByteArray(arg0); + packer.addByteArray(arg1); + packed = packer.getValue(); + unpacker = new PackInputStream(format, packed); + Assert.assertTrue(java.util.Arrays.equals( + unpacker.getByteArray(), arg0)); + Assert.assertTrue(java.util.Arrays.equals( + unpacker.getByteArray(), arg1)); + } + } + } + @Test public void packUnpackNumber01() throws WiredTigerPackingException { diff --git a/test/suite/test_pack.py b/test/suite/test_pack.py index 951c0b0da20..a24ef4fdfe1 100644 --- a/test/suite/test_pack.py +++ b/test/suite/test_pack.py @@ -95,6 +95,11 @@ class test_pack(wttest.WiredTigerTestCase): self.check('3u', r"\x4") self.check('3uu', r"\x4", r"\x42" * 10) self.check('u3u', r"\x42" * 10, r"\x4") + self.check('u', '\x00') + self.check('u', '') + self.check('uu', '', '\x00') + self.check('uu', '\x00', '') + self.check('uu', '', '') self.check('s', "4") self.check("1s", "4") -- cgit v1.2.1 From 4ac341a6860dfc45803b18721250bffd022c8387 Mon Sep 17 00:00:00 2001 From: David Hows Date: Fri, 26 May 2017 12:29:42 +1000 Subject: =?UTF-8?q?WT-3258=20Add=20timers=20tracking=20time=20spent=20on?= =?UTF-8?q?=20failed=20evictions=20of=20large=20p=E2=80=A6=20(#3428)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dist/stat_data.py | 9 +- src/evict/evict_page.c | 29 +++- src/include/stat.h | 3 + src/include/wiredtiger.in | 412 ++++++++++++++++++++++++---------------------- src/support/stat.c | 18 +- 5 files changed, 262 insertions(+), 209 deletions(-) diff --git a/dist/stat_data.py b/dist/stat_data.py index 203a88fb055..acc156b947e 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -204,9 +204,12 @@ connection_stats = [ CacheStat('cache_eviction_dirty', 'modified pages evicted'), CacheStat('cache_eviction_empty_score', 'eviction empty score', 'no_clear,no_scale'), CacheStat('cache_eviction_fail', 'pages selected for eviction unable to be evicted'), - CacheStat('cache_eviction_force', 'pages evicted because they exceeded the in-memory maximum'), - CacheStat('cache_eviction_force_delete', 'pages evicted because they had chains of deleted items'), - CacheStat('cache_eviction_force_fail', 'failed eviction of pages that exceeded the in-memory maximum'), + CacheStat('cache_eviction_force', 'pages evicted because they exceeded the in-memory maximum count'), + CacheStat('cache_eviction_force_time', 'pages evicted because they exceeded the in-memory maximum time (usecs)'), + CacheStat('cache_eviction_force_delete', 'pages evicted because they had chains of deleted items count'), + CacheStat('cache_eviction_force_delete_time', 'pages evicted because they had chains of deleted items time (usecs)'), + CacheStat('cache_eviction_force_fail', 'failed eviction of pages that exceeded the in-memory maximum count'), + CacheStat('cache_eviction_force_fail_time', 'failed eviction of pages that exceeded the in-memory maximum time (usecs)'), CacheStat('cache_eviction_force_retune', 'force re-tuning of eviction workers once in a while'), CacheStat('cache_eviction_get_ref', 'eviction calls to get a page'), CacheStat('cache_eviction_get_ref_empty', 'eviction calls to get a page found queue empty'), diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index 80aba818153..01818f106fc 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -55,10 +55,12 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) WT_BTREE *btree; WT_DECL_RET; WT_PAGE *page; + struct timespec start, stop; bool locked, too_big; btree = S2BT(session); page = ref->page; + __wt_epoch(session, &start); /* * Take some care with order of operations: if we release the hazard @@ -75,19 +77,34 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) (void)__wt_atomic_addv32(&btree->evict_busy, 1); too_big = page->memory_footprint >= btree->splitmempage; - if ((ret = __wt_evict(session, ref, false)) == 0) { - if (too_big) + + /* + * Track how long the call to evict took. If eviction is successful then + * we have one of two pairs of stats to increment. + */ + ret = __wt_evict(session, ref, false); + __wt_epoch(session, &stop); + if (ret == 0) { + if (too_big) { WT_STAT_CONN_INCR(session, cache_eviction_force); - else + WT_STAT_CONN_INCRV(session, cache_eviction_force_time, + WT_TIMEDIFF_US(stop, start)); + } else { /* * If the page isn't too big, we are evicting it because * it had a chain of deleted entries that make traversal * expensive. */ - WT_STAT_CONN_INCR( - session, cache_eviction_force_delete); - } else + WT_STAT_CONN_INCR(session, cache_eviction_force_delete); + WT_STAT_CONN_INCRV(session, + cache_eviction_force_delete_time, + WT_TIMEDIFF_US(stop, start)); + } + } else { WT_STAT_CONN_INCR(session, cache_eviction_force_fail); + WT_STAT_CONN_INCRV(session, cache_eviction_force_fail_time, + WT_TIMEDIFF_US(stop, start)); + } (void)__wt_atomic_subv32(&btree->evict_busy, 1); diff --git a/src/include/stat.h b/src/include/stat.h index fa62cf27693..7c2529f1746 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -316,6 +316,7 @@ struct __wt_connection_stats { int64_t cache_eviction_worker_removed; int64_t cache_eviction_stable_state_workers; int64_t cache_eviction_force_fail; + int64_t cache_eviction_force_fail_time; int64_t cache_eviction_walks_active; int64_t cache_eviction_walks_started; int64_t cache_eviction_force_retune; @@ -340,7 +341,9 @@ struct __wt_connection_stats { int64_t cache_write_lookaside; int64_t cache_pages_inuse; int64_t cache_eviction_force; + int64_t cache_eviction_force_time; int64_t cache_eviction_force_delete; + int64_t cache_eviction_force_delete_time; int64_t cache_eviction_app; int64_t cache_eviction_pages_queued; int64_t cache_eviction_pages_queued_urgent; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 5e76b2915b1..2bbe812d7f7 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -4560,414 +4560,432 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_EVICTION_WORKER_REMOVED 1056 /*! cache: eviction worker thread stable number */ #define WT_STAT_CONN_CACHE_EVICTION_STABLE_STATE_WORKERS 1057 -/*! cache: failed eviction of pages that exceeded the in-memory maximum */ +/*! + * cache: failed eviction of pages that exceeded the in-memory maximum + * count + */ #define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1058 +/*! + * cache: failed eviction of pages that exceeded the in-memory maximum + * time (usecs) + */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL_TIME 1059 /*! cache: files with active eviction walks */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1059 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1060 /*! cache: files with new eviction walks started */ -#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1060 +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1061 /*! cache: force re-tuning of eviction workers once in a while */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_RETUNE 1061 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_RETUNE 1062 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1062 +#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1063 /*! cache: hazard pointer check calls */ -#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1063 +#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1064 /*! cache: hazard pointer check entries walked */ -#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1064 +#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1065 /*! cache: hazard pointer maximum array length */ -#define WT_STAT_CONN_CACHE_HAZARD_MAX 1065 +#define WT_STAT_CONN_CACHE_HAZARD_MAX 1066 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1066 +#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1067 /*! cache: in-memory page splits */ -#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1067 +#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1068 /*! cache: internal pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1068 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1069 /*! cache: internal pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1069 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1070 /*! cache: leaf pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1070 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1071 /*! cache: lookaside table insert calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1071 +#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1072 /*! cache: lookaside table remove calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1072 +#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1073 /*! cache: maximum bytes configured */ -#define WT_STAT_CONN_CACHE_BYTES_MAX 1073 +#define WT_STAT_CONN_CACHE_BYTES_MAX 1074 /*! cache: maximum page size at eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1074 +#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1075 /*! cache: modified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1075 +#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1076 /*! cache: modified pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1076 +#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1077 /*! cache: overflow pages read into cache */ -#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1077 +#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1078 /*! cache: overflow values cached in memory */ -#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1078 +#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1079 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1079 +#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1080 /*! cache: page written requiring lookaside records */ -#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1080 +#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1081 /*! cache: pages currently held in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_INUSE 1081 -/*! cache: pages evicted because they exceeded the in-memory maximum */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1082 -/*! cache: pages evicted because they had chains of deleted items */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1083 +#define WT_STAT_CONN_CACHE_PAGES_INUSE 1082 +/*! cache: pages evicted because they exceeded the in-memory maximum count */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1083 +/*! + * cache: pages evicted because they exceeded the in-memory maximum time + * (usecs) + */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_TIME 1084 +/*! cache: pages evicted because they had chains of deleted items count */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1085 +/*! + * cache: pages evicted because they had chains of deleted items time + * (usecs) + */ +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE_TIME 1086 /*! cache: pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP 1084 +#define WT_STAT_CONN_CACHE_EVICTION_APP 1087 /*! cache: pages queued for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1085 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1088 /*! cache: pages queued for urgent eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1086 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1089 /*! cache: pages queued for urgent eviction during walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1087 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1090 /*! cache: pages read into cache */ -#define WT_STAT_CONN_CACHE_READ 1088 +#define WT_STAT_CONN_CACHE_READ 1091 /*! cache: pages read into cache requiring lookaside entries */ -#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1089 +#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1092 /*! cache: pages requested from the cache */ -#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1090 +#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1093 /*! cache: pages seen by eviction walk */ -#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1091 +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1094 /*! cache: pages selected for eviction unable to be evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1092 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1095 /*! cache: pages walked for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK 1093 +#define WT_STAT_CONN_CACHE_EVICTION_WALK 1096 /*! cache: pages written from cache */ -#define WT_STAT_CONN_CACHE_WRITE 1094 +#define WT_STAT_CONN_CACHE_WRITE 1097 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1095 +#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1098 /*! cache: percentage overhead */ -#define WT_STAT_CONN_CACHE_OVERHEAD 1096 +#define WT_STAT_CONN_CACHE_OVERHEAD 1099 /*! cache: tracked bytes belonging to internal pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1097 +#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1100 /*! cache: tracked bytes belonging to leaf pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_LEAF 1098 +#define WT_STAT_CONN_CACHE_BYTES_LEAF 1101 /*! cache: tracked dirty bytes in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1099 +#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1102 /*! cache: tracked dirty pages in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1100 +#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1103 /*! cache: unmodified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1101 +#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1104 /*! connection: auto adjusting condition resets */ -#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1102 +#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1105 /*! connection: auto adjusting condition wait calls */ -#define WT_STAT_CONN_COND_AUTO_WAIT 1103 +#define WT_STAT_CONN_COND_AUTO_WAIT 1106 /*! connection: detected system time went backwards */ -#define WT_STAT_CONN_TIME_TRAVEL 1104 +#define WT_STAT_CONN_TIME_TRAVEL 1107 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1105 +#define WT_STAT_CONN_FILE_OPEN 1108 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1106 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1109 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1107 +#define WT_STAT_CONN_MEMORY_FREE 1110 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1108 +#define WT_STAT_CONN_MEMORY_GROW 1111 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1109 +#define WT_STAT_CONN_COND_WAIT 1112 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1110 +#define WT_STAT_CONN_RWLOCK_READ 1113 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1111 +#define WT_STAT_CONN_RWLOCK_WRITE 1114 /*! connection: total fsync I/Os */ -#define WT_STAT_CONN_FSYNC_IO 1112 +#define WT_STAT_CONN_FSYNC_IO 1115 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1113 +#define WT_STAT_CONN_READ_IO 1116 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1114 +#define WT_STAT_CONN_WRITE_IO 1117 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1115 +#define WT_STAT_CONN_CURSOR_CREATE 1118 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1116 +#define WT_STAT_CONN_CURSOR_INSERT 1119 /*! cursor: cursor modify calls */ -#define WT_STAT_CONN_CURSOR_MODIFY 1117 +#define WT_STAT_CONN_CURSOR_MODIFY 1120 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1118 +#define WT_STAT_CONN_CURSOR_NEXT 1121 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1119 +#define WT_STAT_CONN_CURSOR_PREV 1122 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1120 +#define WT_STAT_CONN_CURSOR_REMOVE 1123 /*! cursor: cursor reserve calls */ -#define WT_STAT_CONN_CURSOR_RESERVE 1121 +#define WT_STAT_CONN_CURSOR_RESERVE 1124 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1122 +#define WT_STAT_CONN_CURSOR_RESET 1125 /*! cursor: cursor restarted searches */ -#define WT_STAT_CONN_CURSOR_RESTART 1123 +#define WT_STAT_CONN_CURSOR_RESTART 1126 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1124 +#define WT_STAT_CONN_CURSOR_SEARCH 1127 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1125 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1128 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1126 +#define WT_STAT_CONN_CURSOR_UPDATE 1129 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1127 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1130 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1128 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1131 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1129 +#define WT_STAT_CONN_DH_SWEEP_REF 1132 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1130 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1133 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1131 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1134 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1132 +#define WT_STAT_CONN_DH_SWEEP_TOD 1135 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1133 +#define WT_STAT_CONN_DH_SWEEPS 1136 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1134 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1137 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1135 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1138 /*! lock: checkpoint lock acquisitions */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1136 +#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1139 /*! lock: checkpoint lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1137 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1140 /*! lock: checkpoint lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1138 +#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1141 /*! lock: handle-list lock eviction thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1139 +#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1142 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1140 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1143 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1141 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1144 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1142 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1145 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1143 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1146 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1144 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1147 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1145 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1148 /*! lock: table lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_COUNT 1146 +#define WT_STAT_CONN_LOCK_TABLE_COUNT 1149 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1147 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1150 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1148 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1151 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1149 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1152 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1150 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1153 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1151 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1154 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1152 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1155 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1153 +#define WT_STAT_CONN_LOG_FLUSH 1156 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1154 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1157 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1155 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1158 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1156 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1159 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1157 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1160 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1158 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1161 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1159 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1162 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1160 +#define WT_STAT_CONN_LOG_SCANS 1163 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1161 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1164 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1162 +#define WT_STAT_CONN_LOG_WRITE_LSN 1165 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1163 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1166 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1164 +#define WT_STAT_CONN_LOG_SYNC 1167 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1165 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1168 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1166 +#define WT_STAT_CONN_LOG_SYNC_DIR 1169 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1167 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1170 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1168 +#define WT_STAT_CONN_LOG_WRITES 1171 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1169 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1172 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1170 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1173 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1171 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1174 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1172 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1175 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1173 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1176 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1174 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1177 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1175 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1178 /*! log: slot close lost race */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1176 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1179 /*! log: slot close unbuffered waits */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1177 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1180 /*! log: slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1178 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1181 /*! log: slot join atomic update races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1179 +#define WT_STAT_CONN_LOG_SLOT_RACES 1182 /*! log: slot join calls atomic updates raced */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1180 +#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1183 /*! log: slot join calls did not yield */ -#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1181 +#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1184 /*! log: slot join calls found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1182 +#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1185 /*! log: slot join calls slept */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1183 +#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1186 /*! log: slot join calls yielded */ -#define WT_STAT_CONN_LOG_SLOT_YIELD 1184 +#define WT_STAT_CONN_LOG_SLOT_YIELD 1187 /*! log: slot join found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1185 +#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1188 /*! log: slot joins yield time (usecs) */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1186 +#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1189 /*! log: slot transitions unable to find free slot */ -#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1187 +#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1190 /*! log: slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1188 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1191 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1189 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1192 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1190 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1193 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1191 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1194 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1192 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1195 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1193 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1196 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1194 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1197 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1195 +#define WT_STAT_CONN_REC_PAGES 1198 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1196 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1199 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1197 +#define WT_STAT_CONN_REC_PAGE_DELETE 1200 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1198 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1201 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1199 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1202 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1200 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1203 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1201 +#define WT_STAT_CONN_SESSION_OPEN 1204 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1202 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1205 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1203 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1206 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1204 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1207 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1205 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1208 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1206 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1209 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1207 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1210 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1208 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1211 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1209 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1212 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1210 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1213 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1211 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1214 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1212 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1215 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1213 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1216 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1214 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1217 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1215 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1218 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1216 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1219 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1217 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1220 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1218 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1221 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1219 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1222 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1220 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1223 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1221 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1224 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1222 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1225 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1223 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1226 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1224 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1227 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1225 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1228 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1226 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1229 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1227 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1230 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1228 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1231 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1229 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1232 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1230 +#define WT_STAT_CONN_PAGE_SLEEP 1233 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1231 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1234 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1232 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1235 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1233 +#define WT_STAT_CONN_TXN_BEGIN 1236 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1234 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1237 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1235 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1238 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1236 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1239 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1237 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1240 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1238 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1241 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1239 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1242 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1240 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1243 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1241 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1244 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1242 +#define WT_STAT_CONN_TXN_CHECKPOINT 1245 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1243 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1246 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1244 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1247 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1245 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1248 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1246 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1249 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1247 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1250 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1248 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1251 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1249 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1252 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1250 +#define WT_STAT_CONN_TXN_SYNC 1253 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1251 +#define WT_STAT_CONN_TXN_COMMIT 1254 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1252 +#define WT_STAT_CONN_TXN_ROLLBACK 1255 /*! * @} diff --git a/src/support/stat.c b/src/support/stat.c index bc40244f5e6..061615c0931 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -690,7 +690,8 @@ static const char * const __stats_connection_desc[] = { "cache: eviction worker thread evicting pages", "cache: eviction worker thread removed", "cache: eviction worker thread stable number", - "cache: failed eviction of pages that exceeded the in-memory maximum", + "cache: failed eviction of pages that exceeded the in-memory maximum count", + "cache: failed eviction of pages that exceeded the in-memory maximum time (usecs)", "cache: files with active eviction walks", "cache: files with new eviction walks started", "cache: force re-tuning of eviction workers once in a while", @@ -714,8 +715,10 @@ static const char * const __stats_connection_desc[] = { "cache: page split during eviction deepened the tree", "cache: page written requiring lookaside records", "cache: pages currently held in the cache", - "cache: pages evicted because they exceeded the in-memory maximum", - "cache: pages evicted because they had chains of deleted items", + "cache: pages evicted because they exceeded the in-memory maximum count", + "cache: pages evicted because they exceeded the in-memory maximum time (usecs)", + "cache: pages evicted because they had chains of deleted items count", + "cache: pages evicted because they had chains of deleted items time (usecs)", "cache: pages evicted by application threads", "cache: pages queued for eviction", "cache: pages queued for urgent eviction", @@ -986,6 +989,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_worker_removed = 0; /* not clearing cache_eviction_stable_state_workers */ stats->cache_eviction_force_fail = 0; + stats->cache_eviction_force_fail_time = 0; /* not clearing cache_eviction_walks_active */ stats->cache_eviction_walks_started = 0; stats->cache_eviction_force_retune = 0; @@ -1010,7 +1014,9 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_write_lookaside = 0; /* not clearing cache_pages_inuse */ stats->cache_eviction_force = 0; + stats->cache_eviction_force_time = 0; stats->cache_eviction_force_delete = 0; + stats->cache_eviction_force_delete_time = 0; stats->cache_eviction_app = 0; stats->cache_eviction_pages_queued = 0; stats->cache_eviction_pages_queued_urgent = 0; @@ -1280,6 +1286,8 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, cache_eviction_stable_state_workers); to->cache_eviction_force_fail += WT_STAT_READ(from, cache_eviction_force_fail); + to->cache_eviction_force_fail_time += + WT_STAT_READ(from, cache_eviction_force_fail_time); to->cache_eviction_walks_active += WT_STAT_READ(from, cache_eviction_walks_active); to->cache_eviction_walks_started += @@ -1319,8 +1327,12 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, cache_write_lookaside); to->cache_pages_inuse += WT_STAT_READ(from, cache_pages_inuse); to->cache_eviction_force += WT_STAT_READ(from, cache_eviction_force); + to->cache_eviction_force_time += + WT_STAT_READ(from, cache_eviction_force_time); to->cache_eviction_force_delete += WT_STAT_READ(from, cache_eviction_force_delete); + to->cache_eviction_force_delete_time += + WT_STAT_READ(from, cache_eviction_force_delete_time); to->cache_eviction_app += WT_STAT_READ(from, cache_eviction_app); to->cache_eviction_pages_queued += WT_STAT_READ(from, cache_eviction_pages_queued); -- cgit v1.2.1 From 83864cd07f7a58e23b27c208fae47503fd3ed2b6 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 26 May 2017 10:48:38 -0400 Subject: WT-3343 WiredTiger database close can attempt unlock of a lock that's not held. (#3443) Closing the file handles that remain open at database close will attempt to unlock a lock that's not currently held. --- src/os_common/os_fhandle.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/os_common/os_fhandle.c b/src/os_common/os_fhandle.c index 69a01b41d14..0dcc6bc00ef 100644 --- a/src/os_common/os_fhandle.c +++ b/src/os_common/os_fhandle.c @@ -285,7 +285,7 @@ err: if (open_called) * Final close of a handle. */ static int -__handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +__handle_close(WT_SESSION_IMPL *session, WT_FH *fh, bool locked) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -304,7 +304,8 @@ __handle_close(WT_SESSION_IMPL *session, WT_FH *fh) WT_FILE_HANDLE_REMOVE(conn, fh, bucket); (void)__wt_atomic_sub32(&conn->open_file_count, 1); - __wt_spin_unlock(session, &conn->fh_lock); + if (locked) + __wt_spin_unlock(session, &conn->fh_lock); /* Discard underlying resources. */ WT_TRET(fh->handle->close(fh->handle, (WT_SESSION *)session)); @@ -348,7 +349,7 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) return (0); } - return (__handle_close(session, fh)); + return (__handle_close(session, fh, true)); } /* @@ -362,7 +363,7 @@ __wt_close_connection_close(WT_SESSION_IMPL *session) WT_FH *fh, *fh_tmp; WT_TAILQ_SAFE_REMOVE_BEGIN(fh, &S2C(session)->fhqh, q, fh_tmp) { - WT_TRET(__handle_close(session, fh)); + WT_TRET(__handle_close(session, fh, false)); } WT_TAILQ_SAFE_REMOVE_END return (ret); } -- cgit v1.2.1 From 8376798db4c64af2a42a12dfdfc26b21c9fba84d Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Sat, 27 May 2017 04:29:11 +1000 Subject: Bump version string on develop after release. --- README | 6 +++--- RELEASE_INFO | 2 +- build_posix/aclocal/version-set.m4 | 4 ++-- build_posix/aclocal/version.m4 | 2 +- dist/package/wiredtiger.spec | 2 +- src/docs/upgrading.dox | 13 +++++++++++++ 6 files changed, 21 insertions(+), 8 deletions(-) diff --git a/README b/README index f7edae2835d..eb5324eb4d1 100644 --- a/README +++ b/README @@ -1,6 +1,6 @@ -WiredTiger 2.9.2: (December 23, 2016) +WiredTiger 2.9.3: (May 27, 2017) -This is version 2.9.2 of WiredTiger. +This is version 2.9.3 of WiredTiger. WiredTiger release packages and documentation can be found at: @@ -8,7 +8,7 @@ WiredTiger release packages and documentation can be found at: The documentation for this specific release can be found at: - http://source.wiredtiger.com/2.9.2/index.html + http://source.wiredtiger.com/2.9.3/index.html The WiredTiger source code can be found at: diff --git a/RELEASE_INFO b/RELEASE_INFO index b7145aa2cb3..f18f6f67fc8 100644 --- a/RELEASE_INFO +++ b/RELEASE_INFO @@ -1,6 +1,6 @@ WIREDTIGER_VERSION_MAJOR=2 WIREDTIGER_VERSION_MINOR=9 -WIREDTIGER_VERSION_PATCH=2 +WIREDTIGER_VERSION_PATCH=3 WIREDTIGER_VERSION="$WIREDTIGER_VERSION_MAJOR.$WIREDTIGER_VERSION_MINOR.$WIREDTIGER_VERSION_PATCH" WIREDTIGER_RELEASE_DATE=`date "+%B %e, %Y"` diff --git a/build_posix/aclocal/version-set.m4 b/build_posix/aclocal/version-set.m4 index c677ce41192..bbf8547e548 100644 --- a/build_posix/aclocal/version-set.m4 +++ b/build_posix/aclocal/version-set.m4 @@ -2,8 +2,8 @@ dnl build by dist/s_version VERSION_MAJOR=2 VERSION_MINOR=9 -VERSION_PATCH=2 -VERSION_STRING='"WiredTiger 2.9.2: (December 23, 2016)"' +VERSION_PATCH=3 +VERSION_STRING='"WiredTiger 2.9.3: (May 27, 2017)"' AC_SUBST(VERSION_MAJOR) AC_SUBST(VERSION_MINOR) diff --git a/build_posix/aclocal/version.m4 b/build_posix/aclocal/version.m4 index 29782a22f82..1126d7c147b 100644 --- a/build_posix/aclocal/version.m4 +++ b/build_posix/aclocal/version.m4 @@ -1,2 +1,2 @@ dnl WiredTiger product version for AC_INIT. Maintained by dist/s_version -2.9.2 +2.9.3 diff --git a/dist/package/wiredtiger.spec b/dist/package/wiredtiger.spec index aacdf327c98..9d9bdd3949c 100644 --- a/dist/package/wiredtiger.spec +++ b/dist/package/wiredtiger.spec @@ -1,5 +1,5 @@ Name: wiredtiger -Version: 2.9.2 +Version: 2.9.3 Release: 1%{?dist} Summary: WiredTiger data storage engine diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox index 53eb287d9a6..8640991e7cd 100644 --- a/src/docs/upgrading.dox +++ b/src/docs/upgrading.dox @@ -1,5 +1,18 @@ /*! @page upgrading Upgrading WiredTiger applications +@section version_293 Upgrading to Version 2.9.3 +
+ +
Logging subsystem statistics
+
+Two logging subsystem statistics have been removed as they were a duplicate of +other statistics. The \c log_slot_joins and \c log_slot_transitions statistics +are no longer present. They were duplicates of \c log_writes and +\c log_slot_closes respectively. Several new logging related statistics have +been added. +
+ +

@section version_292 Upgrading to Version 2.9.2
-- cgit v1.2.1 From 47b0813a658df20305213e639ea0f1abade3bfc2 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Sat, 27 May 2017 04:37:17 +1000 Subject: WT-3342 Fixup release notes to render in doxygen. --- NEWS | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/NEWS b/NEWS index 35c710e2133..380db269523 100644 --- a/NEWS +++ b/NEWS @@ -7,32 +7,32 @@ WiredTiger release 2.9.2, 2017-05-25 See the upgrading documentation for details of API and behavior changes. Significant changes: -SERVER-16796 Increase logging activity for journal recovery operations -SERVER-28168 Cannot start or repair MongoDB after unexpected shutdown. -SERVER-28194 Fix a bug where a missing WiredTiger.turtle file can result in data loss -WT-98 Update the current cursor value without a search -WT-2439 Enhance reconciliation page layout -WT-2898 Improve performance of eviction-heavy workloads by dynamically controlling the number of eviction threads -WT-3097 Race on reconfigure or shutdown can lead to waiting for statistics log server -WT-3106 Add truncate support to command line wt utility -WT-3114 Avoid archiving log files immediately after recovery -WT-3115 Change the dhandle lock to a read/write lock -WT-3127 Fix a bug with CPU yield calls don't necessarily imply memory barriers -WT-3135 Fix a bug with search_near() for index with custom collator -WT-3137 Fix a hang in __log_slot_join/__log_slot_switch_internal -WT-3144 Fix a bug where random cursor returns not-found when descending to an empty page -WT-3148 Improve eviction efficiency with many small trees -WT-3149 Change eviction to start new walks from a random place in the tree -WT-3150 Reduce impact of checkpoints on eviction server -WT-3152 Convert table lock from a spinlock to a read write lock -WT-3157 Fix a bug in checkpoint/transaction integrity issue when writes fail. -WT-3158 Fix structure packing on Windows. -WT-3188 Fix error handling in logging where fatal errors could lead to a hang -WT-3193 Close a race between verify opening a handle and eviction visiting it -WT-3206 Fix a bug where core dump happened on NULL page index -WT-3218 Fix a bug that could lead to unexpected checkpoint ordering failures -WT-3243 Reorder log slot release so joins don't wait on IO -WT-3262 Stop making schema operations wait for cache +* SERVER-16796 Increase logging activity for journal recovery operations +* SERVER-28168 Cannot start or repair MongoDB after unexpected shutdown. +* SERVER-28194 Fix a bug where a missing WiredTiger.turtle file can result in data loss +* WT-98 Update the current cursor value without a search +* WT-2439 Enhance reconciliation page layout +* WT-2898 Improve performance of eviction-heavy workloads by dynamically controlling the number of eviction threads +* WT-3097 Race on reconfigure or shutdown can lead to waiting for statistics log server +* WT-3106 Add truncate support to command line wt utility +* WT-3114 Avoid archiving log files immediately after recovery +* WT-3115 Change the dhandle lock to a read/write lock +* WT-3127 Fix a bug with CPU yield calls don't necessarily imply memory barriers +* WT-3135 Fix a bug with search_near() for index with custom collator +* WT-3137 Fix a hang in __log_slot_join/__log_slot_switch_internal +* WT-3144 Fix a bug where random cursor returns not-found when descending to an empty page +* WT-3148 Improve eviction efficiency with many small trees +* WT-3149 Change eviction to start new walks from a random place in the tree +* WT-3150 Reduce impact of checkpoints on eviction server +* WT-3152 Convert table lock from a spinlock to a read write lock +* WT-3157 Fix a bug in checkpoint/transaction integrity issue when writes fail. +* WT-3158 Fix structure packing on Windows. +* WT-3188 Fix error handling in logging where fatal errors could lead to a hang +* WT-3193 Close a race between verify opening a handle and eviction visiting it +* WT-3206 Fix a bug where core dump happened on NULL page index +* WT-3218 Fix a bug that could lead to unexpected checkpoint ordering failures +* WT-3243 Reorder log slot release so joins don't wait on IO +* WT-3262 Stop making schema operations wait for cache See JIRA changelog for a full listing: https://jira.mongodb.org/browse/WT/fixforversion/17874 -- cgit v1.2.1 From ce06093958378f549780e423d5b0ee390f922284 Mon Sep 17 00:00:00 2001 From: Sulabh Mahajan Date: Sat, 27 May 2017 06:40:40 +1000 Subject: WT-3248 Mark reconciliation a success if no skipped updates (#3441) Only give up on update/restore if we don't use any updates. Previously, we were tracking the size of saved updates vs the size of uncommitted updates: renamed the variables for clarity. Only give up on update/restore eviction if (1) the page doesn't split and (2) all updates are saved -- i.e., we're not applying anything. --- src/reconcile/rec_write.c | 49 ++++++++++++++++------------------------------- 1 file changed, 17 insertions(+), 32 deletions(-) diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 8f7769766a9..7b1a51da0a0 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -48,8 +48,9 @@ typedef struct { /* Track the page's maximum transaction ID. */ uint64_t max_txn; - uint64_t update_mem; /* Total update memory */ - uint64_t update_mem_skipped; /* Skipped update memory */ + uint64_t update_mem_all; /* Total update memory size */ + uint64_t update_mem_saved; /* Saved update memory size */ + uint64_t update_mem_uncommitted;/* Uncommitted update memory size */ /* * When we can't mark the page clean (for example, checkpoint found some @@ -452,7 +453,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, * that's worth trying. The lookaside table doesn't help if we skipped * updates, it can only help with older readers preventing eviction. */ - if (lookaside_retryp != NULL && r->update_mem_skipped == 0) + if (lookaside_retryp != NULL && r->update_mem_uncommitted == 0) *lookaside_retryp = true; /* Update statistics. */ @@ -561,9 +562,6 @@ __rec_las_checkpoint_test(WT_SESSION_IMPL *session, WT_RECONCILE *r) static int __rec_write_check_complete(WT_SESSION_IMPL *session, WT_RECONCILE *r) { - WT_BOUNDARY *bnd; - size_t i; - /* * Tests in this function are lookaside tests and tests to decide if * rewriting a page in memory is worth doing. In-memory configurations @@ -582,31 +580,16 @@ __rec_write_check_complete(WT_SESSION_IMPL *session, WT_RECONCILE *r) return (EBUSY); /* - * If doing update/restore based eviction, see if rewriting the page in - * memory is worth the effort. + * If when doing update/restore based eviction, we didn't split and + * didn't apply any updates, then give up. + * + * This may lead to saving the page to the lookaside table: that + * decision is made by eviction. */ - if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE)) { - /* If discarding a disk-page size chunk, do it. */ - for (bnd = r->bnd, i = 0; i < r->bnd_next; ++bnd, ++i) - if (bnd->supd == NULL) - return (0); - - /* - * Switch to the lookaside table if we can: it's more effective - * than rewriting a page in memory because it implies eviction. - */ - if (r->update_mem_skipped == 0) - return (EBUSY); + if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && r->bnd_next == 1 && + r->update_mem_all != 0 && r->update_mem_all == r->update_mem_saved) + return (EBUSY); - /* - * Don't rewrite pages where we're not going to get back enough - * memory to care. There's no empirical evidence the 2KB limit - * is a good configuration, but it should keep us from wasting - * time on tiny pages and pages with only a few updates. - */ - if (r->update_mem - r->update_mem_skipped < 2 * WT_KILOBYTE) - return (EBUSY); - } return (0); } @@ -910,7 +893,7 @@ __rec_write_init(WT_SESSION_IMPL *session, r->max_txn = WT_TXN_NONE; /* Track if all updates were skipped. */ - r->update_mem = r->update_mem_skipped = 0; + r->update_mem_all = r->update_mem_saved = r->update_mem_uncommitted = 0; /* Track if the page can be marked clean. */ r->leave_dirty = false; @@ -1230,6 +1213,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_ASSERT(session, *updp == NULL || (*updp)->type != WT_UPDATE_RESERVED); + r->update_mem_all += update_mem; + /* * If all of the updates were aborted, quit. This test is not strictly * necessary because the above loop exits with skipped not set and the @@ -1331,9 +1316,9 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, * recover. That test is comparing the memory we'd recover to the memory * we'd have to re-instantiate as part of the rewrite. */ - r->update_mem += update_mem; + r->update_mem_saved += update_mem; if (skipped) - r->update_mem_skipped += update_mem; + r->update_mem_uncommitted += update_mem; append_origv = false; if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE)) { -- cgit v1.2.1 From 1216c4286d1834c5219b651cfd9f4b82d14f45dd Mon Sep 17 00:00:00 2001 From: Sulabh Mahajan Date: Fri, 2 Jun 2017 06:31:02 +1000 Subject: WT-3307 Close btree/dhandle properly when handling __wt_checkpoint_close failure (#3445) * WT-3307 Close btree/dhandle when error handling __wt_checkpoint_close failure * Let EBUSY be returned immediately without further processing dhandles. * Minor KNF --- src/conn/conn_dhandle.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 4a653dc4c8f..fa79de0cfbe 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -199,8 +199,13 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) /* Reset the tree's eviction priority (if any). */ __wt_evict_priority_clear(session); } - if (!marked_dead || final) - WT_ERR(__wt_checkpoint_close(session, final)); + if (!marked_dead || final) { + if ((ret = __wt_checkpoint_close( + session, final)) == EBUSY) + WT_ERR(ret); + else + WT_TRET(ret); + } } WT_TRET(__wt_btree_close(session)); -- cgit v1.2.1 From b3ff7c4ab91d3c5fda64381d8ab5957cb697167d Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 1 Jun 2017 21:59:07 -0400 Subject: WT-3348 Lint, Windows warnings. (#3449) * Simplify Windows include files, clean up Windows warnings. * Remove incorrect path from copyright skip list. * Clean up a set of places where an operation could potentially truncate a value by performing the operation at the larger size and then casting the result. --- .gitignore | 5 +++++ build_posix/aclocal/strict.m4 | 7 ++++++ dist/s_copyright.list | 1 - src/block/block_compact.c | 6 +++-- src/block/block_ext.c | 2 +- src/evict/evict_lru.c | 2 +- src/include/error.h | 2 +- src/include/os_windows.h | 4 ++-- src/log/log.c | 18 +++++---------- src/reconcile/rec_write.c | 4 ++-- src/support/huffman.c | 2 +- test/format/wts.c | 50 ++++++++++++++++++++++++++++------------- test/utility/test_util.h | 20 ++++++++--------- test/windows/windows_shim.h | 52 +++++++++++++++---------------------------- 14 files changed, 92 insertions(+), 83 deletions(-) diff --git a/.gitignore b/.gitignore index 4611f2aa98c..cc2aad047be 100644 --- a/.gitignore +++ b/.gitignore @@ -62,6 +62,11 @@ tags WT_HOME/ WT_TEST/ +# Bench/workgen +/bench/workgen/_workgen.so +/bench/workgen/workgen/workgen.py +/bench/workgen/workgen_wrap.cxx + # Python /lang/python/_wiredtiger.so /lang/python/wiredtiger.py diff --git a/build_posix/aclocal/strict.m4 b/build_posix/aclocal/strict.m4 index 659867fa69e..8c15a22d575 100644 --- a/build_posix/aclocal/strict.m4 +++ b/build_posix/aclocal/strict.m4 @@ -41,7 +41,14 @@ AC_DEFUN([AM_GCC_WARNINGS], [ w="$w -Wno-error=inline" w="$w -Wno-error=unsafe-loop-optimizations" + # GCC 4.7 + # WiredTiger uses anonymous structures/unions, a C11 extension, + # turn off those warnings. + # GCC 6.X + # Additional warning messages. case "$1" in + [*4.7.[0-9]*]) # gcc4.7 + w="$w -Wno-c11-extensions";; [*6.[0-9].[0-9]*]) # gcc6.X w="$w -Wduplicated-cond" w="$w -Wmisleading-indentation";; diff --git a/dist/s_copyright.list b/dist/s_copyright.list index ba5e7c6ff3e..2ac63bcb159 100644 --- a/dist/s_copyright.list +++ b/dist/s_copyright.list @@ -1,6 +1,5 @@ skip api/leveldb/leveldb_wt_config.h skip api/leveldb/leveldb_wt_config.in -skip bench/workgen/workgen.py skip bench/workgen/workgen/workgen.py skip bench/workgen/workgen_wrap.cxx skip build_win/wiredtiger_config.h diff --git a/src/block/block_compact.c b/src/block/block_compact.c index 2ca167f97a4..e7b9beafb01 100644 --- a/src/block/block_compact.c +++ b/src/block/block_compact.c @@ -242,8 +242,10 @@ __block_dump_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, bool start) memset(percentile, 0, sizeof(percentile)); WT_EXT_FOREACH(ext, el->off) for (i = 0; i < ext->size / 512; ++i) { - ++decile[((ext->off + i * 512) * 10) / size]; - ++percentile[((ext->off + i * 512) * 100) / size]; + ++decile[ + ((ext->off + (wt_off_t)i * 512) * 10) / size]; + ++percentile[ + ((ext->off + (wt_off_t)i * 512) * 100) / size]; } #ifdef __VERBOSE_OUTPUT_PERCENTILE diff --git a/src/block/block_ext.c b/src/block/block_ext.c index 0382e6b92aa..6ef861b59c9 100644 --- a/src/block/block_ext.c +++ b/src/block/block_ext.c @@ -1272,7 +1272,7 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session, * entries: the initial WT_BLOCK_EXTLIST_MAGIC/0 pair and the list- * terminating WT_BLOCK_INVALID_OFFSET/0 pair. */ - size = (entries + 2) * 2 * WT_INTPACK64_MAXSIZE; + size = ((size_t)entries + 2) * 2 * WT_INTPACK64_MAXSIZE; WT_RET(__wt_block_write_size(session, block, &size)); WT_RET(__wt_scr_alloc(session, size, &tmp)); dsk = tmp->mem; diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 97b96788831..b5dd3837531 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1706,7 +1706,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, * whether to give up. When we are only looking for dirty pages, * search the tree for longer. */ - min_pages = 10 * target_pages; + min_pages = 10 * (uint64_t)target_pages; if (F_ISSET(cache, WT_CACHE_EVICT_DIRTY) && !F_ISSET(cache, WT_CACHE_EVICT_CLEAN)) min_pages *= 10; diff --git a/src/include/error.h b/src/include/error.h index 16f916586cc..465ab4fa859 100644 --- a/src/include/error.h +++ b/src/include/error.h @@ -6,7 +6,7 @@ * See the file LICENSE for redistribution information. */ -#define WT_DEBUG_POINT ((void *)0xdeadbeef) +#define WT_DEBUG_POINT ((void *)(uintptr_t)0xdeadbeef) #define WT_DEBUG_BYTE (0xab) /* In DIAGNOSTIC mode, yield in places where we want to encourage races. */ diff --git a/src/include/os_windows.h b/src/include/os_windows.h index 78a359e65fd..ea54d00af1f 100644 --- a/src/include/os_windows.h +++ b/src/include/os_windows.h @@ -42,9 +42,9 @@ struct timespec { * These are POSIX types which Windows lacks * Eventually WiredTiger will migrate away from these types */ -typedef uint32_t u_int; +typedef unsigned int u_int; typedef unsigned char u_char; -typedef uint64_t u_long; +typedef unsigned long u_long; /* * Windows does have ssize_t diff --git a/src/log/log.c b/src/log/log.c index 0de881660b2..960b87106cc 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -472,14 +472,12 @@ __wt_log_reset(WT_SESSION_IMPL *session, uint32_t lognum) * system. */ WT_RET(__wt_close(session, &log->log_fh)); - WT_RET(__log_get_files(session, - WT_LOG_FILENAME, &logfiles, &logcount)); + WT_RET(__log_get_files(session, WT_LOG_FILENAME, &logfiles, &logcount)); for (i = 0; i < logcount; i++) { WT_ERR(__wt_log_extract_lognum( session, logfiles[i], &old_lognum)); WT_ASSERT(session, old_lognum < lognum); - WT_ERR(__wt_log_remove( - session, WT_LOG_FILENAME, old_lognum)); + WT_ERR(__wt_log_remove(session, WT_LOG_FILENAME, old_lognum)); } log->fileid = lognum; @@ -487,8 +485,7 @@ __wt_log_reset(WT_SESSION_IMPL *session, uint32_t lognum) WT_WITH_SLOT_LOCK(session, log, ret = __log_newfile(session, true, NULL)); WT_ERR(__wt_log_slot_init(session, false)); -err: WT_TRET( - __wt_fs_directory_list_free(session, &logfiles, logcount)); +err: WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount)); return (ret); } @@ -1150,8 +1147,7 @@ __log_truncate(WT_SESSION_IMPL *session, WT_ERR(__log_get_files(session, WT_LOG_FILENAME, &logfiles, &logcount)); for (i = 0; i < logcount; i++) { WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum)); - if (lognum > lsn->l.file && - lognum < log->trunc_lsn.l.file) { + if (lognum > lsn->l.file && lognum < log->trunc_lsn.l.file) { WT_ERR(__log_openfile(session, &log_fh, file_prefix, lognum, 0)); /* @@ -1951,8 +1947,7 @@ advance: /* Truncate if we're in recovery. */ if (LF_ISSET(WT_LOGSCAN_RECOVER) && __wt_log_cmp(&rd_lsn, &log->trunc_lsn) < 0) - WT_ERR(__log_truncate(session, - &rd_lsn, WT_LOG_FILENAME, 0)); + WT_ERR(__log_truncate(session, &rd_lsn, WT_LOG_FILENAME, 0)); err: WT_STAT_CONN_INCR(session, log_scans); /* @@ -2086,8 +2081,7 @@ __wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, if (compression_failed || result_len / log->allocsize >= record->size / log->allocsize) - WT_STAT_CONN_INCR(session, - log_compress_write_fails); + WT_STAT_CONN_INCR(session, log_compress_write_fails); else { WT_STAT_CONN_INCR(session, log_compress_writes); WT_STAT_CONN_INCRV(session, log_compress_mem, diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 7b1a51da0a0..1e76f0d84d0 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -2149,8 +2149,8 @@ __rec_split_init(WT_SESSION_IMPL *session, r->page_size = r->page_size_orig = max; if (r->raw_compression) r->max_raw_page_size = r->page_size = - (uint32_t)WT_MIN(r->page_size * 10, - WT_MAX(r->page_size, btree->maxmempage / 2)); + (uint32_t)WT_MIN((uint64_t)r->page_size * 10, + WT_MAX((uint64_t)r->page_size, btree->maxmempage / 2)); /* * If we have to split, we want to choose a smaller page size for the * split pages, because otherwise we could end up splitting one large diff --git a/src/support/huffman.c b/src/support/huffman.c index 83d1e790ce7..17342c53ced 100644 --- a/src/support/huffman.c +++ b/src/support/huffman.c @@ -483,7 +483,7 @@ __wt_huffman_open(WT_SESSION_IMPL *session, set_codes(node, huffman->codes, 0, 0); WT_ERR(__wt_calloc_def( - session, 1U << huffman->max_depth, &huffman->code2symbol)); + session, (size_t)1U << huffman->max_depth, &huffman->code2symbol)); make_table(session, huffman->code2symbol, huffman->max_depth, huffman->codes, huffman->numSymbols); diff --git a/test/format/wts.c b/test/format/wts.c index 2a8c6f54b06..673b65794f5 100644 --- a/test/format/wts.c +++ b/test/format/wts.c @@ -35,28 +35,40 @@ static const char * compressor(uint32_t compress_flag) { + const char *p; + + p = "unrecognized compressor flag"; switch (compress_flag) { case COMPRESS_NONE: - return ("none"); + p ="none"; + break; case COMPRESS_LZ4: - return ("lz4"); + p ="lz4"; + break; case COMPRESS_LZ4_NO_RAW: - return ("lz4-noraw"); + p ="lz4-noraw"; + break; case COMPRESS_LZO: - return ("LZO1B-6"); + p ="LZO1B-6"; + break; case COMPRESS_SNAPPY: - return ("snappy"); + p ="snappy"; + break; case COMPRESS_ZLIB: - return ("zlib"); + p ="zlib"; + break; case COMPRESS_ZLIB_NO_RAW: - return ("zlib-noraw"); + p ="zlib-noraw"; + break; case COMPRESS_ZSTD: - return ("zstd"); - default: + p ="zstd"; break; + default: + testutil_die(EINVAL, + "illegal compression flag: %#" PRIx32, compress_flag); + /* NOTREACHED */ } - testutil_die(EINVAL, - "illegal compression flag: %#" PRIx32, compress_flag); + return (p); } /* @@ -66,16 +78,22 @@ compressor(uint32_t compress_flag) static const char * encryptor(uint32_t encrypt_flag) { + const char *p; + + p = "unrecognized encryptor flag"; switch (encrypt_flag) { case ENCRYPT_NONE: - return ("none"); + p = "none"; + break; case ENCRYPT_ROTN_7: - return ("rotn,keyid=7"); - default: + p = "rotn,keyid=7"; break; + default: + testutil_die(EINVAL, + "illegal encryption flag: %#" PRIx32, encrypt_flag); + /* NOTREACHED */ } - testutil_die(EINVAL, - "illegal encryption flag: %#" PRIx32, encrypt_flag); + return (p); } static int diff --git a/test/utility/test_util.h b/test/utility/test_util.h index 66746c794e8..9c67bde2457 100644 --- a/test/utility/test_util.h +++ b/test/utility/test_util.h @@ -25,21 +25,21 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ -#include "wt_internal.h" /* For __wt_XXX */ +#include "wt_internal.h" #ifdef _WIN32 - #define DIR_DELIM '\\' - #define DIR_DELIM_STR "\\" - #define DIR_EXISTS_COMMAND "IF EXIST " - #define RM_COMMAND "rd /s /q " +#define DIR_DELIM '\\' +#define DIR_DELIM_STR "\\" +#define DIR_EXISTS_COMMAND "IF EXIST " +#define RM_COMMAND "rd /s /q " #else - #define DIR_DELIM '/' - #define DIR_DELIM_STR "/" - #define RM_COMMAND "rm -rf " +#define DIR_DELIM '/' +#define DIR_DELIM_STR "/" +#define RM_COMMAND "rm -rf " #endif -#define DEFAULT_DIR "WT_TEST" -#define MKDIR_COMMAND "mkdir " +#define DEFAULT_DIR "WT_TEST" +#define MKDIR_COMMAND "mkdir " #ifdef _WIN32 #include "windows_shim.h" diff --git a/test/windows/windows_shim.h b/test/windows/windows_shim.h index d3950ba9a18..88b707f9ad9 100644 --- a/test/windows/windows_shim.h +++ b/test/windows/windows_shim.h @@ -25,27 +25,13 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ - -#ifdef _WIN32 - -#define WIN32_LEAN_AND_MEAN -#include -#include -#include -#include -#include -#include - #include "wt_internal.h" -#define inline __inline - -/* Define some POSIX types */ -typedef int u_int; +#include /* _mkdir */ /* Windows does not define constants for access() */ -#define R_OK 04 -#define X_OK R_OK +#define R_OK 04 +#define X_OK R_OK /* MSVC Doesn't provide __func__, it has __FUNCTION__ */ #ifdef _MSC_VER @@ -77,11 +63,13 @@ int gettimeofday(struct timeval* tp, void* tzp); */ typedef uint32_t useconds_t; -int -sleep(int seconds); +int sleep(int seconds); +int usleep(useconds_t useconds); -int -usleep(useconds_t useconds); +#define lseek(fd, offset, origin) \ + _lseek(fd, (long)(offset), origin) +#define write(fd, buffer, count) \ + _write(fd, buffer, (unsigned int)(count)) /* * Emulate the support we need for tests and example code. @@ -102,16 +90,12 @@ typedef HANDLE pthread_t; typedef int pthread_rwlockattr_t; typedef int pthread_attr_t; -int pthread_rwlock_destroy(pthread_rwlock_t *); -int pthread_rwlock_init(pthread_rwlock_t *, - const pthread_rwlockattr_t *); -int pthread_rwlock_rdlock(pthread_rwlock_t *); -int pthread_rwlock_unlock(pthread_rwlock_t *); -int pthread_rwlock_trywrlock(pthread_rwlock_t *); -int pthread_rwlock_wrlock(pthread_rwlock_t *); - -int pthread_create(pthread_t *, const pthread_attr_t *, - void *(*)(void *), void *); -int pthread_join(pthread_t, void **); - -#endif +int pthread_create( + pthread_t *, const pthread_attr_t *, void *(*)(void *), void *); +int pthread_join(pthread_t, void **); +int pthread_rwlock_destroy(pthread_rwlock_t *); +int pthread_rwlock_init(pthread_rwlock_t *, const pthread_rwlockattr_t *); +int pthread_rwlock_rdlock(pthread_rwlock_t *); +int pthread_rwlock_trywrlock(pthread_rwlock_t *); +int pthread_rwlock_unlock(pthread_rwlock_t *); +int pthread_rwlock_wrlock(pthread_rwlock_t *); -- cgit v1.2.1 From 42daa132f21c1391ae2b2b3d789df85878aca471 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Fri, 2 Jun 2017 12:08:34 +1000 Subject: WT-3345 Tune WiredTiger's read/write locks. (#3446) * Add a workload that stresses rwlock performance under various conditions (including `threads >> cores`), tune read and write lock operations to only spin when it is likely to help, and to back off to a condition variable when there is heavy contention. * New rwlock implementation: queue readers and writers separately, don't enforce fairness among readers or if the lock is overwhelmed. * Switch to a spinlock whenever we need to lock a page. Previously we had a read/write lock in the __wt_page structure that was only ever acquired in write mode, plus a spinlock in the page->modify structure. Switch to using the spinlock for everything. One slight downside of this change is that we can no longer precisely determine whether a page is locked based on the status of the spinlock (since another page sharing the same lock could be holding it in the places where we used to check). Since that was only ever used for diagnostic / debugging purposes, I think the benefit of the change outweighs this issue. * Fix a bug where a failure during `__wt_curfile_create` caused a data handle to be released twice. This is caught by the sanity checking assertions in the new read/write lock code. * Split may be holding a page lock when restoring update. Tell the restore code we have the page exclusive and no further locking is required. * Allocate a spinlock for each modified page. Using shared page locks for mulitple operations that need to lock a page (including inserts and reconciliation) resulted in self-deadlock when the lookaside table was used. That's because reconciliation held a page lock, then caused inserts to the lookaside table, which acquired the page lock for a page in the lookaside table. With a shared set of page locks, they could both be the same lock. Switch (back?) to allocating a spinlock per modified page. Earlier in this ticket we saved some space in __wt_page, so growing __wt_page_modify is unlikely to be noticeable. * Tweak padding and position of the spinlock in WT_PAGE_MODIFY to claw back some bytes. Move evict_pass_gen to the end of WT_PAGE: on inspection, it should be a cold field relative to the others, which now fit in one x86 cache line. --- src/async/async_api.c | 2 +- src/btree/bt_compact.c | 4 +- src/btree/bt_cursor.c | 4 +- src/btree/bt_debug.c | 2 - src/btree/bt_discard.c | 2 +- src/btree/bt_handle.c | 2 +- src/btree/bt_read.c | 4 +- src/btree/bt_split.c | 22 ++- src/btree/col_modify.c | 9 +- src/btree/row_modify.c | 16 +- src/conn/conn_cache.c | 2 +- src/conn/conn_cache_pool.c | 4 +- src/conn/conn_ckpt.c | 2 +- src/conn/conn_dhandle.c | 2 +- src/conn/conn_handle.c | 15 +- src/conn/conn_log.c | 12 +- src/conn/conn_stat.c | 2 +- src/conn/conn_sweep.c | 2 +- src/cursor/cur_file.c | 9 +- src/include/btmem.h | 27 ++- src/include/connection.h | 15 -- src/include/extern.h | 7 +- src/include/extern_posix.h | 2 +- src/include/extern_win.h | 2 +- src/include/mutex.h | 31 ++-- src/include/serial.i | 30 ++-- src/include/session.h | 4 + src/include/verify_build.h | 1 - src/include/wt_internal.h | 4 +- src/lsm/lsm_manager.c | 2 +- src/lsm/lsm_tree.c | 2 +- src/os_posix/os_mtx_cond.c | 16 +- src/os_win/os_mtx_cond.c | 6 +- src/reconcile/rec_write.c | 6 +- src/session/session_dhandle.c | 4 +- src/support/mtx_rw.c | 405 ++++++++++++++++++++++++------------------ src/support/thread_group.c | 10 +- src/txn/txn.c | 8 +- test/csuite/Makefile.am | 3 + test/csuite/rwlock/main.c | 184 +++++++++++++++++++ 40 files changed, 567 insertions(+), 319 deletions(-) create mode 100644 test/csuite/rwlock/main.c diff --git a/src/async/async_api.c b/src/async/async_api.c index 1e4bfd51c46..0f3e376fbfd 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -435,7 +435,7 @@ __wt_async_destroy(WT_SESSION_IMPL *session) F_CLR(conn, WT_CONN_SERVER_ASYNC); for (i = 0; i < conn->async_workers; i++) WT_TRET(__wt_thread_join(session, async->worker_tids[i])); - WT_TRET(__wt_cond_destroy(session, &async->flush_cond)); + __wt_cond_destroy(session, &async->flush_cond); /* Close the server threads' sessions. */ for (i = 0; i < conn->async_workers; i++) diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c index 17308d02d91..c6a412aa84e 100644 --- a/src/btree/bt_compact.c +++ b/src/btree/bt_compact.c @@ -60,7 +60,7 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) */ if (mod->rec_result == WT_PM_REC_REPLACE || mod->rec_result == WT_PM_REC_MULTIBLOCK) - __wt_writelock(session, &page->page_lock); + WT_PAGE_LOCK(session, page); if (mod->rec_result == WT_PM_REC_REPLACE) ret = bm->compact_page_skip(bm, session, @@ -80,7 +80,7 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) if (mod->rec_result == WT_PM_REC_REPLACE || mod->rec_result == WT_PM_REC_MULTIBLOCK) - __wt_writeunlock(session, &page->page_lock); + WT_PAGE_UNLOCK(session, page); return (ret); } diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c index 7e415150cc5..52435eeefed 100644 --- a/src/btree/bt_cursor.c +++ b/src/btree/bt_cursor.c @@ -346,7 +346,7 @@ __cursor_col_modify( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type) { return (__wt_col_modify(session, cbt, - cbt->iface.recno, &cbt->iface.value, NULL, modify_type)); + cbt->iface.recno, &cbt->iface.value, NULL, modify_type, false)); } /* @@ -358,7 +358,7 @@ __cursor_row_modify( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type) { return (__wt_row_modify(session, cbt, - &cbt->iface.key, &cbt->iface.value, NULL, modify_type)); + &cbt->iface.key, &cbt->iface.value, NULL, modify_type, false)); } /* diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index c3f98a98ec5..394ac6c7b84 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -689,8 +689,6 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref) WT_RET(ds->f(ds, ", entries %" PRIu32, entries)); WT_RET(ds->f(ds, ", %s", __wt_page_is_modified(page) ? "dirty" : "clean")); - WT_RET(ds->f(ds, ", %s", __wt_rwlock_islocked( - session, &page->page_lock) ? "locked" : "unlocked")); if (F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS)) WT_RET(ds->f(ds, ", keys-built")); diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index a04face8f64..bfa8eb25aac 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -98,7 +98,6 @@ __page_out_int(WT_SESSION_IMPL *session, WT_PAGE **pagep, bool rewrite) */ WT_ASSERT(session, !__wt_page_is_modified(page)); WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU)); - WT_ASSERT(session, !__wt_rwlock_islocked(session, &page->page_lock)); /* * If a root page split, there may be one or more pages linked from the @@ -254,6 +253,7 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page) __wt_ovfl_discard_free(session, page); __wt_free(session, page->modify->ovfl_track); + __wt_spin_destroy(session, &page->modify->page_lock); __wt_free(session, page->modify); } diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index e4780f1bf42..06fbd6b74c7 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -442,7 +442,7 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) } /* Initialize locks. */ - __wt_rwlock_init(session, &btree->ovfl_lock); + WT_RET(__wt_rwlock_init(session, &btree->ovfl_lock)); WT_RET(__wt_spin_init(session, &btree->flush_lock, "btree flush")); btree->modified = false; /* Clean */ diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index e6a0f53ab40..de84a711019 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -91,7 +91,7 @@ __col_instantiate(WT_SESSION_IMPL *session, /* Search the page and add updates. */ WT_RET(__wt_col_search(session, recno, ref, cbt)); WT_RET(__wt_col_modify( - session, cbt, recno, NULL, upd, WT_UPDATE_STANDARD)); + session, cbt, recno, NULL, upd, WT_UPDATE_STANDARD, false)); return (0); } @@ -106,7 +106,7 @@ __row_instantiate(WT_SESSION_IMPL *session, /* Search the page and add updates. */ WT_RET(__wt_row_search(session, key, ref, cbt, true)); WT_RET(__wt_row_modify( - session, cbt, key, NULL, upd, WT_UPDATE_STANDARD)); + session, cbt, key, NULL, upd, WT_UPDATE_STANDARD, false)); return (0); } diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index c2c56a18131..71346baee2e 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -1166,13 +1166,19 @@ __split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock, for (;;) { parent = ref->home; + /* + * The page will be marked dirty, and we can only lock a page + * with a modify structure. + */ + WT_RET(__wt_page_modify_init(session, parent)); + if (trylock) - WT_RET(__wt_try_writelock(session, &parent->page_lock)); + WT_RET(WT_PAGE_TRYLOCK(session, parent)); else - __wt_writelock(session, &parent->page_lock); + WT_PAGE_LOCK(session, parent); if (parent == ref->home) break; - __wt_writeunlock(session, &parent->page_lock); + WT_PAGE_UNLOCK(session, parent); } /* @@ -1195,7 +1201,7 @@ __split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock, *parentp = parent; return (0); -err: __wt_writeunlock(session, &parent->page_lock); +err: WT_PAGE_UNLOCK(session, parent); return (ret); } @@ -1211,7 +1217,7 @@ __split_internal_unlock(WT_SESSION_IMPL *session, WT_PAGE *parent, bool hazard) if (hazard) ret = __wt_hazard_clear(session, parent->pg_intl_parent_ref); - __wt_writeunlock(session, &parent->page_lock); + WT_PAGE_UNLOCK(session, parent); return (ret); } @@ -1425,7 +1431,7 @@ __split_multi_inmem( /* Apply the modification. */ WT_ERR(__wt_col_modify(session, - &cbt, recno, NULL, upd, WT_UPDATE_STANDARD)); + &cbt, recno, NULL, upd, WT_UPDATE_STANDARD, true)); break; case WT_PAGE_ROW_LEAF: /* Build a key. */ @@ -1446,8 +1452,8 @@ __split_multi_inmem( WT_ERR(__wt_row_search(session, key, ref, &cbt, true)); /* Apply the modification. */ - WT_ERR(__wt_row_modify( - session, &cbt, key, NULL, upd, WT_UPDATE_STANDARD)); + WT_ERR(__wt_row_modify(session, &cbt, + key, NULL, upd, WT_UPDATE_STANDARD, true)); break; WT_ILLEGAL_VALUE_ERR(session); } diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c index c256f03a612..2a64ec03952 100644 --- a/src/btree/col_modify.c +++ b/src/btree/col_modify.c @@ -17,7 +17,8 @@ static int __col_insert_alloc( */ int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, - uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type) + uint64_t recno, const WT_ITEM *value, + WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) { static const WT_ITEM col_fix_remove = { "", 1, NULL, 0, 0 }; WT_BTREE *btree; @@ -106,7 +107,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, /* Serialize the update. */ WT_ERR(__wt_update_serial( - session, page, &cbt->ins->upd, &upd, upd_size)); + session, page, &cbt->ins->upd, &upd, upd_size, false)); } else { /* Allocate the append/update list reference as necessary. */ if (append) { @@ -188,11 +189,11 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, if (append) WT_ERR(__wt_col_append_serial( session, page, cbt->ins_head, cbt->ins_stack, - &ins, ins_size, &cbt->recno, skipdepth)); + &ins, ins_size, &cbt->recno, skipdepth, exclusive)); else WT_ERR(__wt_insert_serial( session, page, cbt->ins_head, cbt->ins_stack, - &ins, ins_size, skipdepth)); + &ins, ins_size, skipdepth, exclusive)); } /* If the update was successful, add it to the in-memory log. */ diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c index 2bf3c2f29bc..c7afdcfcb31 100644 --- a/src/btree/row_modify.c +++ b/src/btree/row_modify.c @@ -15,18 +15,12 @@ int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) { - WT_CONNECTION_IMPL *conn; WT_PAGE_MODIFY *modify; - conn = S2C(session); - WT_RET(__wt_calloc_one(session, &modify)); - /* - * Select a spinlock for the page; let the barrier immediately below - * keep things from racing too badly. - */ - modify->page_lock = ++conn->page_lock_cnt % WT_PAGE_LOCKS; + /* Initialize the spinlock for the page. */ + WT_RET(__wt_spin_init(session, &modify->page_lock, "btree page")); /* * Multiple threads of control may be searching and deciding to modify @@ -48,7 +42,7 @@ __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, - WT_UPDATE *upd_arg, u_int modify_type) + WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) { WT_DECL_RET; WT_INSERT *ins; @@ -129,7 +123,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, /* Serialize the update. */ WT_ERR(__wt_update_serial( - session, page, upd_entry, &upd, upd_size)); + session, page, upd_entry, &upd, upd_size, exclusive)); } else { /* * Allocate the insert array as necessary. @@ -204,7 +198,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, /* Insert the WT_INSERT structure. */ WT_ERR(__wt_insert_serial( session, page, cbt->ins_head, cbt->ins_stack, - &ins, ins_size, skipdepth)); + &ins, ins_size, skipdepth, exclusive)); } if (logged && modify_type != WT_UPDATE_RESERVED) diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c index ad83f0b2b4a..5515eb026ca 100644 --- a/src/conn/conn_cache.c +++ b/src/conn/conn_cache.c @@ -312,7 +312,7 @@ __wt_cache_destroy(WT_SESSION_IMPL *session) cache->bytes_dirty_intl + cache->bytes_dirty_leaf, cache->pages_dirty_intl + cache->pages_dirty_leaf); - WT_TRET(__wt_cond_destroy(session, &cache->evict_cond)); + __wt_cond_destroy(session, &cache->evict_cond); __wt_spin_destroy(session, &cache->evict_pass_lock); __wt_spin_destroy(session, &cache->evict_queue_lock); __wt_spin_destroy(session, &cache->evict_walk_lock); diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c index c1c9c98b30c..adc2e2bffc3 100644 --- a/src/conn/conn_cache_pool.c +++ b/src/conn/conn_cache_pool.c @@ -225,7 +225,7 @@ err: __wt_spin_unlock(session, &__wt_process.spinlock); __wt_free(session, pool_name); if (ret != 0 && created) { __wt_free(session, cp->name); - WT_TRET(__wt_cond_destroy(session, &cp->cache_pool_cond)); + __wt_cond_destroy(session, &cp->cache_pool_cond); __wt_free(session, cp); } return (ret); @@ -391,7 +391,7 @@ __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session) __wt_free(session, cp->name); __wt_spin_destroy(session, &cp->cache_pool_lock); - WT_TRET(__wt_cond_destroy(session, &cp->cache_pool_cond)); + __wt_cond_destroy(session, &cp->cache_pool_cond); __wt_free(session, cp); } diff --git a/src/conn/conn_ckpt.c b/src/conn/conn_ckpt.c index 43673cd335e..a47524af2d7 100644 --- a/src/conn/conn_ckpt.c +++ b/src/conn/conn_ckpt.c @@ -231,7 +231,7 @@ __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) WT_TRET(__wt_thread_join(session, conn->ckpt_tid)); conn->ckpt_tid_set = false; } - WT_TRET(__wt_cond_destroy(session, &conn->ckpt_cond)); + __wt_cond_destroy(session, &conn->ckpt_cond); /* Close the server thread's session. */ if (conn->ckpt_session != NULL) { diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index fa79de0cfbe..d4670562eb8 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -52,7 +52,7 @@ __wt_conn_dhandle_alloc( WT_RET(__wt_calloc_one(session, &dhandle)); - __wt_rwlock_init(session, &dhandle->rwlock); + WT_ERR(__wt_rwlock_init(session, &dhandle->rwlock)); dhandle->name_hash = __wt_hash_city64(uri, strlen(uri)); WT_ERR(__wt_strdup(session, uri, &dhandle->name)); WT_ERR(__wt_strdup(session, checkpoint, &dhandle->checkpoint)); diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index 11b5368e9ad..32a0d80c1f3 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -62,14 +62,9 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file")); /* Read-write locks */ - __wt_rwlock_init(session, &conn->dhandle_lock); - __wt_rwlock_init(session, &conn->hot_backup_lock); - __wt_rwlock_init(session, &conn->table_lock); - - WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS, &conn->page_lock)); - for (i = 0; i < WT_PAGE_LOCKS; ++i) - WT_RET( - __wt_spin_init(session, &conn->page_lock[i], "btree page")); + WT_RET(__wt_rwlock_init(session, &conn->dhandle_lock)); + WT_RET(__wt_rwlock_init(session, &conn->hot_backup_lock)); + WT_RET(__wt_rwlock_init(session, &conn->table_lock)); /* Setup the spin locks for the LSM manager queues. */ WT_RET(__wt_spin_init(session, @@ -106,7 +101,6 @@ void __wt_connection_destroy(WT_CONNECTION_IMPL *conn) { WT_SESSION_IMPL *session; - u_int i; /* Check there's something to destroy. */ if (conn == NULL) @@ -137,9 +131,6 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_spin_destroy(session, &conn->schema_lock); __wt_rwlock_destroy(session, &conn->table_lock); __wt_spin_destroy(session, &conn->turtle_lock); - for (i = 0; i < WT_PAGE_LOCKS; ++i) - __wt_spin_destroy(session, &conn->page_lock[i]); - __wt_free(session, conn->page_lock); /* Free allocated memory. */ __wt_free(session, conn->cfg); diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index dac16cc9d00..37acbe4a1a4 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -879,7 +879,7 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET(__wt_spin_init(session, &log->log_sync_lock, "log sync")); WT_RET(__wt_spin_init(session, &log->log_writelsn_lock, "log write LSN")); - __wt_rwlock_init(session, &log->log_archive_lock); + WT_RET(__wt_rwlock_init(session, &log->log_archive_lock)); if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_LOG)) log->allocsize = (uint32_t) WT_MAX(conn->buffer_alignment, WT_LOG_ALIGN); @@ -1042,12 +1042,12 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) } /* Destroy the condition variables now that all threads are stopped */ - WT_TRET(__wt_cond_destroy(session, &conn->log_cond)); - WT_TRET(__wt_cond_destroy(session, &conn->log_file_cond)); - WT_TRET(__wt_cond_destroy(session, &conn->log_wrlsn_cond)); + __wt_cond_destroy(session, &conn->log_cond); + __wt_cond_destroy(session, &conn->log_file_cond); + __wt_cond_destroy(session, &conn->log_wrlsn_cond); - WT_TRET(__wt_cond_destroy(session, &conn->log->log_sync_cond)); - WT_TRET(__wt_cond_destroy(session, &conn->log->log_write_cond)); + __wt_cond_destroy(session, &conn->log->log_sync_cond); + __wt_cond_destroy(session, &conn->log->log_write_cond); __wt_rwlock_destroy(session, &conn->log->log_archive_lock); __wt_spin_destroy(session, &conn->log->log_lock); __wt_spin_destroy(session, &conn->log->log_slot_lock); diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c index 2554083b26c..f38d81a7f7a 100644 --- a/src/conn/conn_stat.c +++ b/src/conn/conn_stat.c @@ -648,7 +648,7 @@ __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close) WT_TRET(__wt_thread_join(session, conn->stat_tid)); conn->stat_tid_set = false; } - WT_TRET(__wt_cond_destroy(session, &conn->stat_cond)); + __wt_cond_destroy(session, &conn->stat_cond); /* Log a set of statistics on shutdown if configured. */ if (is_close) diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index fbedb938bd8..df60a3c784d 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -429,7 +429,7 @@ __wt_sweep_destroy(WT_SESSION_IMPL *session) WT_TRET(__wt_thread_join(session, conn->sweep_tid)); conn->sweep_tid_set = 0; } - WT_TRET(__wt_cond_destroy(session, &conn->sweep_cond)); + __wt_cond_destroy(session, &conn->sweep_cond); if (conn->sweep_session != NULL) { wt_session = &conn->sweep_session->iface; diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index 4469cac685d..3b6328a2d93 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -520,7 +520,14 @@ __curfile_create(WT_SESSION_IMPL *session, WT_STAT_DATA_INCR(session, cursor_create); if (0) { -err: WT_TRET(__curfile_close(cursor)); +err: /* + * Our caller expects to release the data handle if we fail. + * Disconnect it from the cursor before closing. + */ + if (session->dhandle != NULL) + __wt_cursor_dhandle_decr_use(session); + cbt->btree = NULL; + WT_TRET(__curfile_close(cursor)); *cursorp = NULL; } diff --git a/src/include/btmem.h b/src/include/btmem.h index 4e8d3c05d7d..32839192a96 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -414,18 +414,20 @@ struct __wt_page_modify { size_t discard_allocated; } *ovfl_track; +#define WT_PAGE_LOCK(s, p) \ + __wt_spin_lock((s), &(p)->modify->page_lock) +#define WT_PAGE_TRYLOCK(s, p) \ + __wt_spin_trylock((s), &(p)->modify->page_lock) +#define WT_PAGE_UNLOCK(s, p) \ + __wt_spin_unlock((s), &(p)->modify->page_lock) + WT_SPINLOCK page_lock; /* Page's spinlock */ + /* * The write generation is incremented when a page is modified, a page * is clean if the write generation is 0. */ uint32_t write_gen; -#define WT_PAGE_LOCK(s, p) \ - __wt_spin_lock((s), &S2C(s)->page_lock[(p)->modify->page_lock]) -#define WT_PAGE_UNLOCK(s, p) \ - __wt_spin_unlock((s), &S2C(s)->page_lock[(p)->modify->page_lock]) - uint8_t page_lock; /* Page's spinlock */ - #define WT_PM_REC_EMPTY 1 /* Reconciliation: no replacement */ #define WT_PM_REC_MULTIBLOCK 2 /* Reconciliation: multiple blocks */ #define WT_PM_REC_REPLACE 3 /* Reconciliation: single block */ @@ -603,13 +605,6 @@ struct __wt_page { uint8_t unused[2]; /* Unused padding */ - /* - * Used to protect and co-ordinate splits for internal pages and - * reconciliation for all pages. Only used to co-ordinate among the - * uncommon cases that require exclusive access to a page. - */ - WT_RWLOCK page_lock; - /* * The page's read generation acts as an LRU value for each page in the * tree; it is used by the eviction server thread to select pages to be @@ -636,8 +631,6 @@ struct __wt_page { #define WT_READGEN_STEP 100 uint64_t read_gen; - uint64_t evict_pass_gen; /* Eviction pass generation */ - size_t memory_footprint; /* Memory attached to the page */ /* Page's on-disk representation: NULL for pages created in memory. */ @@ -645,6 +638,10 @@ struct __wt_page { /* If/when the page is modified, we need lots more information. */ WT_PAGE_MODIFY *modify; + + /* This is the 64 byte boundary, try to keep hot fields above here. */ + + uint64_t evict_pass_gen; /* Eviction pass generation */ }; /* diff --git a/src/include/connection.h b/src/include/connection.h index 6f656270f38..bf2f8a2c7e1 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -175,21 +175,6 @@ struct __wt_connection_impl { WT_SPINLOCK turtle_lock; /* Turtle file spinlock */ WT_RWLOCK dhandle_lock; /* Data handle list lock */ - /* - * We distribute the btree page locks across a set of spin locks. Don't - * use too many: they are only held for very short operations, each one - * is 64 bytes, so 256 will fill the L1 cache on most CPUs. - * - * Use a prime number of buckets rather than assuming a good hash - * (Reference Sedgewick, Algorithms in C, "Hash Functions"). - * - * Note: this can't be an array, we impose cache-line alignment and gcc - * doesn't support that for arrays smaller than the alignment. - */ -#define WT_PAGE_LOCKS 17 - WT_SPINLOCK *page_lock; /* Btree page spinlocks */ - u_int page_lock_cnt; /* Next spinlock to use */ - /* Connection queue */ TAILQ_ENTRY(__wt_connection_impl) q; /* Cache pool queue */ diff --git a/src/include/extern.h b/src/include/extern.h index 01c21b188c0..f055e4810b3 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -180,7 +180,7 @@ extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *b extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tree_walk_skip( WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -189,7 +189,7 @@ extern int __wt_row_ikey_alloc(WT_SESSION_IMPL *session, uint32_t cell_offset, c extern int __wt_row_ikey_incr(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_ikey(WT_SESSION_IMPL *session, uint32_t cell_offset, const void *key, size_t size, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_insert_alloc(WT_SESSION_IMPL *session, const WT_ITEM *key, u_int skipdepth, WT_INSERT **insp, size_t *ins_sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_UPDATE **updp, size_t *sizep, u_int modify_type) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd); @@ -687,10 +687,9 @@ extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg); extern void __wt_print_huffman_code(void *huffman_arg, uint16_t symbol); extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern void __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l); +extern int __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK *l); extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern void __wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *l); extern void __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l); extern void __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l); extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/include/extern_posix.h b/src/include/extern_posix.h index 3afffef687b..c0ed056c7b6 100644 --- a/src/include/extern_posix.h +++ b/src/include/extern_posix.h @@ -15,7 +15,7 @@ extern int __wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *ma extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled); extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); -extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern bool __wt_absolute_path(const char *path); diff --git a/src/include/extern_win.h b/src/include/extern_win.h index 4e232a2df80..d548ee0b2ec 100644 --- a/src/include/extern_win.h +++ b/src/include/extern_win.h @@ -13,7 +13,7 @@ extern int __wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, v extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled); extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); -extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern bool __wt_absolute_path(const char *path); diff --git a/src/include/mutex.h b/src/include/mutex.h index 00babd47fbf..5f814c2799e 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -37,17 +37,21 @@ struct __wt_condvar { * Don't modify this structure without understanding the read/write locking * functions. */ -union __wt_rwlock { /* Read/write lock */ - uint64_t u; - struct { - uint32_t wr; /* Writers and readers */ - } i; - struct { - uint16_t writers; /* Now serving for writers */ - uint16_t readers; /* Now serving for readers */ - uint16_t next; /* Next available ticket number */ - uint16_t writers_active;/* Count of active writers */ - } s; +struct __wt_rwlock { /* Read/write lock */ + volatile union { + uint64_t v; /* Full 64-bit value */ + struct { + uint8_t current; /* Current ticket */ + uint8_t next; /* Next available ticket */ + uint8_t reader; /* Read queue ticket */ + uint8_t __notused; /* Padding */ + uint16_t readers_active;/* Count of active readers */ + uint16_t readers_queued;/* Count of queued readers */ + } s; + } u; + + WT_CONDVAR *cond_readers; /* Blocking readers */ + WT_CONDVAR *cond_writers; /* Blocking writers */ }; /* @@ -63,8 +67,8 @@ union __wt_rwlock { /* Read/write lock */ #define SPINLOCK_PTHREAD_MUTEX_ADAPTIVE 3 struct __wt_spinlock { - WT_CACHE_LINE_PAD_BEGIN #if SPINLOCK_TYPE == SPINLOCK_GCC + WT_CACHE_LINE_PAD_BEGIN volatile int lock; #elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX || \ SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE || \ @@ -87,5 +91,8 @@ struct __wt_spinlock { int16_t stat_int_usecs_off; /* waiting server threads offset */ int8_t initialized; /* Lock initialized, for cleanup */ + +#if SPINLOCK_TYPE == SPINLOCK_GCC WT_CACHE_LINE_PAD_END +#endif }; diff --git a/src/include/serial.i b/src/include/serial.i index 18ff0bb7ec2..bd0e498f621 100644 --- a/src/include/serial.i +++ b/src/include/serial.i @@ -154,7 +154,7 @@ __col_append_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head, static inline int __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, - size_t new_ins_size, uint64_t *recnop, u_int skipdepth) + size_t new_ins_size, uint64_t *recnop, u_int skipdepth, bool exclusive) { WT_INSERT *new_ins = *new_insp; WT_DECL_RET; @@ -165,11 +165,16 @@ __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page, /* Clear references to memory we now own and must free on error. */ *new_insp = NULL; - /* Acquire the page's spinlock, call the worker function. */ - WT_PAGE_LOCK(session, page); + /* + * Acquire the page's spinlock unless we already have exclusive access. + * Then call the worker function. + */ + if (!exclusive) + WT_PAGE_LOCK(session, page); ret = __col_append_serial_func( session, ins_head, ins_stack, new_ins, recnop, skipdepth); - WT_PAGE_UNLOCK(session, page); + if (!exclusive) + WT_PAGE_UNLOCK(session, page); if (ret != 0) { /* Free unused memory on error. */ @@ -198,7 +203,7 @@ __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page, static inline int __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp, - size_t new_ins_size, u_int skipdepth) + size_t new_ins_size, u_int skipdepth, bool exclusive) { WT_INSERT *new_ins = *new_insp; WT_DECL_RET; @@ -220,10 +225,12 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, ret = __insert_simple_func( session, ins_stack, new_ins, skipdepth); else { - WT_PAGE_LOCK(session, page); + if (!exclusive) + WT_PAGE_LOCK(session, page); ret = __insert_serial_func( session, ins_head, ins_stack, new_ins, skipdepth); - WT_PAGE_UNLOCK(session, page); + if (!exclusive) + WT_PAGE_UNLOCK(session, page); } if (ret != 0) { @@ -252,7 +259,8 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page, */ static inline int __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, - WT_UPDATE **srch_upd, WT_UPDATE **updp, size_t upd_size) + WT_UPDATE **srch_upd, WT_UPDATE **updp, size_t upd_size, + bool exclusive) { WT_DECL_RET; WT_UPDATE *obsolete, *upd = *updp; @@ -295,7 +303,7 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, /* * If there are no subsequent WT_UPDATE structures we are done here. */ - if (upd->next == NULL) + if (upd->next == NULL || exclusive) return (0); /* @@ -316,11 +324,11 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, } /* If we can't lock it, don't scan, that's okay. */ - if (__wt_try_writelock(session, &page->page_lock) != 0) + if (WT_PAGE_TRYLOCK(session, page) != 0) return (0); obsolete = __wt_update_obsolete_check(session, page, upd->next); - __wt_writeunlock(session, &page->page_lock); + WT_PAGE_UNLOCK(session, page); if (obsolete != NULL) __wt_update_obsolete_free(session, page, obsolete); diff --git a/src/include/session.h b/src/include/session.h index 543063f5a90..dfd84675721 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -98,6 +98,10 @@ struct __wt_session_impl { */ TAILQ_HEAD(__tables, __wt_table) tables; + /* Current rwlock for callback. */ + WT_RWLOCK *current_rwlock; + uint8_t current_rwticket; + WT_ITEM **scratch; /* Temporary memory for any function */ u_int scratch_alloc; /* Currently allocated */ size_t scratch_cached; /* Scratch bytes cached */ diff --git a/src/include/verify_build.h b/src/include/verify_build.h index e93f5931c21..57189b5c2b2 100644 --- a/src/include/verify_build.h +++ b/src/include/verify_build.h @@ -60,7 +60,6 @@ __wt_verify_build(void) sizeof(s) > WT_CACHE_LINE_ALIGNMENT || \ sizeof(s) % WT_CACHE_LINE_ALIGNMENT == 0) WT_PADDING_CHECK(WT_LOGSLOT); - WT_PADDING_CHECK(WT_SPINLOCK); WT_PADDING_CHECK(WT_TXN_STATE); /* diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h index e250cfc33ba..1c9600dd27f 100644 --- a/src/include/wt_internal.h +++ b/src/include/wt_internal.h @@ -268,6 +268,8 @@ struct __wt_ref; typedef struct __wt_ref WT_REF; struct __wt_row; typedef struct __wt_row WT_ROW; +struct __wt_rwlock; + typedef struct __wt_rwlock WT_RWLOCK; struct __wt_salvage_cookie; typedef struct __wt_salvage_cookie WT_SALVAGE_COOKIE; struct __wt_save_upd; @@ -304,8 +306,6 @@ union __wt_lsn; typedef union __wt_lsn WT_LSN; union __wt_rand_state; typedef union __wt_rand_state WT_RAND_STATE; -union __wt_rwlock; - typedef union __wt_rwlock WT_RWLOCK; /* * Forward type declarations for internal types: END * DO NOT EDIT: automatically built by dist/s_typedef. diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index f391c553d2a..b1f775a275e 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -330,7 +330,7 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) __wt_spin_destroy(session, &manager->switch_lock); __wt_spin_destroy(session, &manager->app_lock); __wt_spin_destroy(session, &manager->manager_lock); - WT_TRET(__wt_cond_destroy(session, &manager->work_cond)); + __wt_cond_destroy(session, &manager->work_cond); return (ret); } diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index fe36237969f..9932ba6b5b3 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -472,7 +472,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session, /* Try to open the tree. */ WT_RET(__wt_calloc_one(session, &lsm_tree)); - __wt_rwlock_init(session, &lsm_tree->rwlock); + WT_RET(__wt_rwlock_init(session, &lsm_tree->rwlock)); WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri)); diff --git a/src/os_posix/os_mtx_cond.c b/src/os_posix/os_mtx_cond.c index 10606e8108e..1018bf860d6 100644 --- a/src/os_posix/os_mtx_cond.c +++ b/src/os_posix/os_mtx_cond.c @@ -153,7 +153,7 @@ err: * __wt_cond_destroy -- * Destroy a condition variable. */ -int +void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) { WT_CONDVAR *cond; @@ -161,11 +161,15 @@ __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) cond = *condp; if (cond == NULL) - return (0); + return; - ret = pthread_cond_destroy(&cond->cond); - WT_TRET(pthread_mutex_destroy(&cond->mtx)); - __wt_free(session, *condp); + if ((ret = pthread_cond_destroy(&cond->cond)) != 0) + WT_PANIC_MSG( + session, ret, "pthread_cond_destroy: %s", cond->name); - return (ret); + if ((ret = pthread_mutex_destroy(&cond->mtx)) != 0) + WT_PANIC_MSG( + session, ret, "pthread_mutex_destroy: %s", cond->name); + + __wt_free(session, *condp); } diff --git a/src/os_win/os_mtx_cond.c b/src/os_win/os_mtx_cond.c index 2002d1e925c..9d4339c8731 100644 --- a/src/os_win/os_mtx_cond.c +++ b/src/os_win/os_mtx_cond.c @@ -163,18 +163,16 @@ __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) * __wt_cond_destroy -- * Destroy a condition variable. */ -int +void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) { WT_CONDVAR *cond; cond = *condp; if (cond == NULL) - return (0); + return; /* Do nothing to delete Condition Variable */ DeleteCriticalSection(&cond->mtx); __wt_free(session, *condp); - - return (0); } diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 1e76f0d84d0..8bff4c630c0 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -386,7 +386,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, * In-memory splits: reconciliation of an internal page cannot handle * a child page splitting during the reconciliation. */ - __wt_writelock(session, &page->page_lock); + WT_PAGE_LOCK(session, page); oldest_id = __wt_txn_oldest_id(session); if (LF_ISSET(WT_EVICTING)) @@ -405,7 +405,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, /* Initialize the reconciliation structure for each new run. */ if ((ret = __rec_write_init( session, ref, flags, salvage, &session->reconcile)) != 0) { - __wt_writeunlock(session, &page->page_lock); + WT_PAGE_UNLOCK(session, page); return (ret); } r = session->reconcile; @@ -446,7 +446,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_TRET(__rec_write_wrapup_err(session, r, page)); /* Release the reconciliation lock. */ - __wt_writeunlock(session, &page->page_lock); + WT_PAGE_UNLOCK(session, page); /* * If our caller can configure lookaside table reconciliation, flag if diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c index 2d0a2eeb2dc..4565ae71896 100644 --- a/src/session/session_dhandle.c +++ b/src/session/session_dhandle.c @@ -261,8 +261,8 @@ __wt_session_release_btree(WT_SESSION_IMPL *session) * can get a handle without special flags. */ if (F_ISSET(dhandle, WT_DHANDLE_DISCARD | WT_DHANDLE_DISCARD_FORCE)) { - __session_find_dhandle(session, - dhandle->name, dhandle->checkpoint, &dhandle_cache); + WT_SAVE_DHANDLE(session, __session_find_dhandle(session, + dhandle->name, dhandle->checkpoint, &dhandle_cache)); if (dhandle_cache != NULL) __session_discard_dhandle(session, dhandle_cache); } diff --git a/src/support/mtx_rw.c b/src/support/mtx_rw.c index 0126e77e9b8..d86d75a5340 100644 --- a/src/support/mtx_rw.c +++ b/src/support/mtx_rw.c @@ -27,7 +27,7 @@ */ /* - * Based on "Spinlocks and Read-Write Locks" by Dr. Steven Fuerst: + * Inspired by "Spinlocks and Read-Write Locks" by Dr. Steven Fuerst: * http://locklessinc.com/articles/locks/ * * Dr. Fuerst further credits: @@ -39,77 +39,46 @@ * by John Mellor-Crummey and Michael Scott in their landmark paper "Scalable * Reader-Writer Synchronization for Shared-Memory Multiprocessors". * - * The following is an explanation of this code. First, the underlying lock - * structure. + * The following is an explanation of our interpretation and implementation. + * First, the underlying lock structure. * + * volatile union { + * uint64_t v; // Full 64-bit value * struct { - * uint16_t writers; Now serving for writers - * uint16_t readers; Now serving for readers - * uint16_t next; Next available ticket number - * uint16_t __notused; Padding - * } + * uint8_t current; // Current ticket + * uint8_t next; // Next available ticket + * uint8_t reader; // Read queue ticket + * uint8_t __notused; // Padding + * uint16_t readers_active; // Count of active readers + * uint16_t readers_queued; // Count of queued readers + * } s; + * } u; * * First, imagine a store's 'take a number' ticket algorithm. A customer takes * a unique ticket number and customers are served in ticket order. In the data - * structure, 'writers' is the next writer to be served, 'readers' is the next - * reader to be served, and 'next' is the next available ticket number. + * structure, 'next' is the ticket that will be allocated next, and 'current' + * is the ticket being served. * - * Next, consider exclusive (write) locks. The 'now serving' number for writers - * is 'writers'. To lock, 'take a number' and wait until that number is being - * served; more specifically, atomically copy and increment the current value of - * 'next', and then wait until 'writers' equals that copied number. + * Next, consider exclusive (write) locks. To lock, 'take a number' and wait + * until that number is being served; more specifically, atomically increment + * 'next', and then wait until 'current' equals that allocated ticket. * - * Shared (read) locks are similar. Like writers, readers atomically get the - * next number available. However, instead of waiting for 'writers' to equal - * their number, they wait for 'readers' to equal their number. + * Shared (read) locks are similar, except that readers can share a ticket + * (both with each other and with a single writer). Readers with a given + * ticket execute before the writer with that ticket. In other words, writers + * wait for both their ticket to become current and for all readers to exit + * the lock. * - * This has the effect of queuing lock requests in the order they arrive - * (incidentally avoiding starvation). + * If there are no active writers (indicated by 'current' == 'next'), readers + * can immediately enter the lock by atomically incrementing 'readers_active'. + * When there are writers active, readers form a new queue by first setting + * 'reader' to 'next' (i.e. readers are scheduled after any queued writers, + * avoiding starvation), then atomically incrementing 'readers_queued'. * - * Each lock/unlock pair requires incrementing both 'readers' and 'writers'. - * In the case of a reader, the 'readers' increment happens when the reader - * acquires the lock (to allow read-lock sharing), and the 'writers' increment - * happens when the reader releases the lock. In the case of a writer, both - * 'readers' and 'writers' are incremented when the writer releases the lock. - * - * For example, consider the following read (R) and write (W) lock requests: - * - * writers readers next - * 0 0 0 - * R: ticket 0, readers match OK 0 1 1 - * R: ticket 1, readers match OK 0 2 2 - * R: ticket 2, readers match OK 0 3 3 - * W: ticket 3, writers no match block 0 3 4 - * R: ticket 2, unlock 1 3 4 - * R: ticket 0, unlock 2 3 4 - * R: ticket 1, unlock 3 3 4 - * W: ticket 3, writers match OK 3 3 4 - * - * Note the writer blocks until 'writers' equals its ticket number and it does - * not matter if readers unlock in order or not. - * - * Readers or writers entering the system after the write lock is queued block, - * and the next ticket holder (reader or writer) will unblock when the writer - * unlocks. An example, continuing from the last line of the above example: - * - * writers readers next - * W: ticket 3, writers match OK 3 3 4 - * R: ticket 4, readers no match block 3 3 5 - * R: ticket 5, readers no match block 3 3 6 - * W: ticket 6, writers no match block 3 3 7 - * W: ticket 3, unlock 4 4 7 - * R: ticket 4, readers match OK 4 5 7 - * R: ticket 5, readers match OK 4 6 7 - * - * The 'next' field is a 2-byte value so the available ticket number wraps at - * 64K requests. If a thread's lock request is not granted until the 'next' - * field cycles and the same ticket is taken by another thread, we could grant - * a lock to two separate threads at the same time, and bad things happen: two - * writer threads or a reader thread and a writer thread would run in parallel, - * and lock waiters could be skipped if the unlocks race. This is unlikely, it - * only happens if a lock request is blocked by 64K other requests. The fix is - * to grow the lock structure fields, but the largest atomic instruction we have - * is 8 bytes, the structure has no room to grow. + * The 'next' field is a 1-byte value so the available ticket number wraps + * after 256 requests. If a thread's write lock request would cause the 'next' + * field to catch up with 'current', instead it waits to avoid the same ticket + * being allocated to multiple threads. */ #include "wt_internal.h" @@ -118,12 +87,14 @@ * __wt_rwlock_init -- * Initialize a read/write lock. */ -void +int __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l) { - WT_UNUSED(session); + l->u.v = 0; - l->u = 0; + WT_RET(__wt_cond_alloc(session, "rwlock wait", &l->cond_readers)); + WT_RET(__wt_cond_alloc(session, "rwlock wait", &l->cond_writers)); + return (0); } /* @@ -133,9 +104,10 @@ __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l) void __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK *l) { - WT_UNUSED(session); + l->u.v = 0; - l->u = 0; + __wt_cond_destroy(session, &l->cond_readers); + __wt_cond_destroy(session, &l->cond_writers); } /* @@ -149,46 +121,35 @@ __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_STAT_CONN_INCR(session, rwlock_read); - new = old = *l; + new.u.v = old.u.v = l->u.v; /* - * This read lock can only be granted if the lock was last granted to - * a reader and there are no readers or writers blocked on the lock, - * that is, if this thread's ticket would be the next ticket granted. - * Do the cheap test to see if this can possibly succeed (and confirm - * the lock is in the correct state to grant this read lock). + * This read lock can only be granted if there are no active writers. + * + * Also check for overflow in case there are 64K active readers. */ - if (old.s.readers != old.s.next) + if (old.u.s.current != old.u.s.next || + new.u.s.readers_active == UINT16_MAX) return (EBUSY); /* - * The replacement lock value is a result of allocating a new ticket and - * incrementing the reader value to match it. + * The replacement lock value is a result of adding an active reader. + * + * We rely on this atomic operation to provide a barrier. */ - new.s.readers = new.s.next = old.s.next + 1; - return (__wt_atomic_cas64(&l->u, old.u, new.u) ? 0 : EBUSY); + new.u.s.readers_active++; + return (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v) ? 0 : EBUSY); } /* - * __wt_readlock_spin -- - * Spin to get a read lock: only yield the CPU if the lock is held - * exclusive. + * __read_blocked -- + * Check whether the current read lock request should keep waiting. */ -void -__wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *l) +static bool +__read_blocked(WT_SESSION_IMPL *session) { - /* - * Try to get the lock in a single operation if it is available to - * readers. This avoids the situation where multiple readers arrive - * concurrently and have to line up in order to enter the lock. For - * read-heavy workloads it can make a significant difference. - */ - while (__wt_try_readlock(session, l) != 0) { - if (l->s.writers_active > 0) - __wt_yield(); - else - WT_PAUSE(); - } + return (session->current_rwticket != + session->current_rwlock->u.s.current); } /* @@ -198,41 +159,90 @@ __wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *l) void __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) { - uint16_t ticket; + WT_RWLOCK new, old; int pause_cnt; + int16_t writers_active; + uint8_t ticket; WT_STAT_CONN_INCR(session, rwlock_read); WT_DIAGNOSTIC_YIELD; - /* - * Possibly wrap: if we have more than 64K lockers waiting, the ticket - * value will wrap and two lockers will simultaneously be granted the - * lock. - */ - ticket = __wt_atomic_fetch_add16(&l->s.next, 1); - for (pause_cnt = 0; ticket != l->s.readers;) { + for (;;) { /* - * We failed to get the lock; pause before retrying and if we've - * paused enough, yield so we don't burn CPU to no purpose. This - * situation happens if there are more threads than cores in the - * system and we're thrashing on shared resources. + * Fast path: if there is no active writer, join the current + * group. */ - if (++pause_cnt < WT_THOUSAND) + for (old.u.v = l->u.v; + old.u.s.current == old.u.s.next; + old.u.v = l->u.v) { + new.u.v = old.u.v; + /* + * Check for overflow: if the maximum number of readers + * are already active, wait to try again. + */ + if (++new.u.s.readers_active == 0) + goto stall; + if (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v)) + return; WT_PAUSE(); - else + } + + /* + * There is an active writer: join the next group. + * + * Limit how many readers can queue: don't allow more readers + * to queue than there are active writers (calculated as + * `next - current`): otherwise, in write-heavy workloads, + * readers can keep queuing up in front of writers and + * throughput is unstable. + * + * If the maximum number of readers are already queued, wait + * until we can get a valid ticket. + */ + writers_active = old.u.s.next - old.u.s.current; + if (old.u.s.readers_queued > writers_active) { +stall: __wt_cond_wait( + session, l->cond_readers, WT_THOUSAND, NULL); + continue; + } + + /* + * If we are the first reader to queue, set the next read + * group. Note: don't re-read from the lock or we could race + * with a writer unlocking. + */ + new.u.v = old.u.v; + if (new.u.s.readers_queued++ == 0) + new.u.s.reader = new.u.s.next; + ticket = new.u.s.reader; + + if (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v)) + break; + } + + /* Wait for our group to start. */ + for (pause_cnt = 0; ticket != l->u.s.current; pause_cnt++) { + if (pause_cnt < 1000) + WT_PAUSE(); + else if (pause_cnt < 1200) __wt_yield(); + else { + session->current_rwlock = l; + session->current_rwticket = ticket; + __wt_cond_wait( + session, l->cond_readers, 0, __read_blocked); + } } - /* - * We're the only writer of the readers field, so the update does not - * need to be atomic. - */ - ++l->s.readers; + WT_ASSERT(session, l->u.s.readers_active > 0); /* * Applications depend on a barrier here so that operations holding the - * lock see consistent data. + * lock see consistent data. The atomic operation above isn't + * sufficient here because we don't own the lock until our ticket comes + * up and whatever data we are protecting may have changed in the + * meantime. */ WT_READ_BARRIER(); } @@ -244,13 +254,22 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) void __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) { - WT_UNUSED(session); + WT_RWLOCK new, old; - /* - * Increment the writers value (other readers are doing the same, make - * sure we don't race). - */ - (void)__wt_atomic_add16(&l->s.writers, 1); + do { + old.u.v = l->u.v; + WT_ASSERT(session, old.u.s.readers_active > 0); + + /* + * Decrement the active reader count (other readers are doing + * the same, make sure we don't race). + */ + new.u.v = old.u.v; + --new.u.s.readers_active; + } while (!__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v)); + + if (new.u.s.readers_active == 0 && new.u.s.current != new.u.s.next) + __wt_cond_signal(session, l->cond_writers); } /* @@ -264,22 +283,44 @@ __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_STAT_CONN_INCR(session, rwlock_write); - old = new = *l; - /* - * This write lock can only be granted if the lock was last granted to - * a writer and there are no readers or writers blocked on the lock, - * that is, if this thread's ticket would be the next ticket granted. - * Do the cheap test to see if this can possibly succeed (and confirm - * the lock is in the correct state to grant this write lock). + * This write lock can only be granted if no readers or writers blocked + * on the lock, that is, if this thread's ticket would be the next + * ticket granted. Check if this can possibly succeed (and confirm the + * lock is in the correct state to grant this write lock). */ - if (old.s.writers != old.s.next) + old.u.v = l->u.v; + if (old.u.s.current != old.u.s.next || old.u.s.readers_active != 0) return (EBUSY); - /* The replacement lock value is a result of allocating a new ticket. */ - ++new.s.next; - ++new.s.writers_active; - return (__wt_atomic_cas64(&l->u, old.u, new.u) ? 0 : EBUSY); + /* + * We've checked above that there is no writer active (since + * `current == next`), so there should be no readers queued. + */ + WT_ASSERT(session, old.u.s.readers_queued == 0); + + /* + * The replacement lock value is a result of allocating a new ticket. + * + * We rely on this atomic operation to provide a barrier. + */ + new.u.v = old.u.v; + new.u.s.next++; + return (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v) ? 0 : EBUSY); +} + +/* + * __write_blocked -- + * Check whether the current write lock request should keep waiting. + */ +static bool +__write_blocked(WT_SESSION_IMPL *session) +{ + WT_RWLOCK *l; + + l = session->current_rwlock; + return (session->current_rwticket != l->u.s.current || + l->u.s.readers_active != 0); } /* @@ -289,34 +330,51 @@ __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) void __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) { - uint16_t ticket; + WT_RWLOCK new, old; int pause_cnt; + uint8_t ticket; WT_STAT_CONN_INCR(session, rwlock_write); - /* - * Possibly wrap: if we have more than 64K lockers waiting, the ticket - * value will wrap and two lockers will simultaneously be granted the - * lock. - */ - ticket = __wt_atomic_fetch_add16(&l->s.next, 1); - (void)__wt_atomic_add16(&l->s.writers_active, 1); - for (pause_cnt = 0; ticket != l->s.writers;) { + for (;;) { + new.u.v = old.u.v = l->u.v; + ticket = new.u.s.next++; + /* - * We failed to get the lock; pause before retrying and if we've - * paused enough, sleep so we don't burn CPU to no purpose. This - * situation happens if there are more threads than cores in the - * system and we're thrashing on shared resources. + * Avoid wrapping: if we allocate more than 256 tickets, two + * lockers will simultaneously be granted the lock. */ - if (++pause_cnt < WT_THOUSAND) + if (new.u.s.current == new.u.s.next) { + __wt_cond_wait( + session, l->cond_writers, WT_THOUSAND, NULL); + continue; + } + if (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v)) + break; + } + + /* Wait for our group to start and any readers to drain. */ + for (pause_cnt = 0; + ticket != l->u.s.current || l->u.s.readers_active != 0; + pause_cnt++) { + if (pause_cnt < 1000) WT_PAUSE(); - else - __wt_sleep(0, 10); + else if (pause_cnt < 1200) + __wt_yield(); + else { + session->current_rwlock = l; + session->current_rwticket = ticket; + __wt_cond_wait( + session, l->cond_writers, 0, __write_blocked); + } } /* * Applications depend on a barrier here so that operations holding the - * lock see consistent data. + * lock see consistent data. The atomic operation above isn't + * sufficient here because we don't own the lock until our ticket comes + * up and whatever data we are protecting may have changed in the + * meantime. */ WT_READ_BARRIER(); } @@ -328,29 +386,34 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) void __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) { - WT_RWLOCK new; - - WT_UNUSED(session); + WT_RWLOCK new, old; - (void)__wt_atomic_sub16(&l->s.writers_active, 1); + do { + new.u.v = old.u.v = l->u.v; - /* - * Ensure that all updates made while the lock was held are visible to - * the next thread to acquire the lock. - */ - WT_WRITE_BARRIER(); + /* + * We're holding the lock exclusive, there shouldn't be any + * active readers. + */ + WT_ASSERT(session, old.u.s.readers_active == 0); - new = *l; + /* + * Allow the next batch to start. + * + * If there are readers in the next group, swap queued readers + * to active: this could race with new readlock requests, so we + * have to spin. + */ + if (++new.u.s.current == new.u.s.reader) { + new.u.s.readers_active = new.u.s.readers_queued; + new.u.s.readers_queued = 0; + } + } while (!__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v)); - /* - * We're the only writer of the writers/readers fields, so the update - * does not need to be atomic; we have to update both values at the - * same time though, otherwise we'd potentially race with the thread - * next granted the lock. - */ - ++new.s.writers; - ++new.s.readers; - l->i.wr = new.i.wr; + if (new.u.s.readers_active != 0) + __wt_cond_signal(session, l->cond_readers); + else if (new.u.s.current != new.u.s.next) + __wt_cond_signal(session, l->cond_writers); WT_DIAGNOSTIC_YIELD; } @@ -365,6 +428,6 @@ __wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l) { WT_UNUSED(session); - return (l->s.writers != l->s.next || l->s.readers != l->s.next); + return (l->u.s.current != l->u.s.next || l->u.s.readers_active != 0); } #endif diff --git a/src/support/thread_group.c b/src/support/thread_group.c index 5abc3d28cc0..59caaedf5cf 100644 --- a/src/support/thread_group.c +++ b/src/support/thread_group.c @@ -104,7 +104,7 @@ __thread_group_shrink( if (thread == NULL) continue; WT_TRET(__wt_thread_join(session, thread->tid)); - WT_TRET(__wt_cond_destroy(session, &thread->pause_cond)); + __wt_cond_destroy(session, &thread->pause_cond); } __wt_writelock(session, &group->lock); for (current_slot = group->alloc; current_slot > new_count; ) { @@ -234,7 +234,7 @@ err: /* wt_session = (WT_SESSION *)thread->session; WT_TRET(wt_session->close(wt_session, NULL)); } - WT_TRET(__wt_cond_destroy(session, &thread->pause_cond)); + __wt_cond_destroy(session, &thread->pause_cond); __wt_free(session, thread); } @@ -290,7 +290,7 @@ __wt_thread_group_create( __wt_verbose(session, WT_VERB_THREAD_GROUP, "Creating thread group: %p", (void *)group); - __wt_rwlock_init(session, &group->lock); + WT_RET(__wt_rwlock_init(session, &group->lock)); WT_ERR(__wt_cond_alloc( session, "thread group cond", &group->wait_cond)); cond_alloced = true; @@ -307,7 +307,7 @@ __wt_thread_group_create( /* Cleanup on error to avoid leaking resources */ err: if (ret != 0) { if (cond_alloced) - WT_TRET(__wt_cond_destroy(session, &group->wait_cond)); + __wt_cond_destroy(session, &group->wait_cond); __wt_rwlock_destroy(session, &group->lock); } return (ret); @@ -332,7 +332,7 @@ __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) __wt_free(session, group->threads); - WT_TRET(__wt_cond_destroy(session, &group->wait_cond)); + __wt_cond_destroy(session, &group->wait_cond); __wt_rwlock_destroy(session, &group->lock); /* diff --git a/src/txn/txn.c b/src/txn/txn.c index d9edbb80564..fb77ab4e860 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -126,7 +126,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session) n = 0; /* We're going to scan the table: wait for the lock. */ - __wt_readlock_spin(session, &txn_global->scan_rwlock); + __wt_readlock(session, &txn_global->scan_rwlock); current_id = pinned_id = txn_global->current; prev_oldest_id = txn_global->oldest_id; @@ -293,7 +293,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) /* First do a read-only scan. */ if (wait) - __wt_readlock_spin(session, &txn_global->scan_rwlock); + __wt_readlock(session, &txn_global->scan_rwlock); else if ((ret = __wt_try_readlock(session, &txn_global->scan_rwlock)) != 0) return (ret == EBUSY ? 0 : ret); @@ -782,8 +782,8 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET(__wt_spin_init(session, &txn_global->id_lock, "transaction id lock")); - __wt_rwlock_init(session, &txn_global->scan_rwlock); - __wt_rwlock_init(session, &txn_global->nsnap_rwlock); + WT_RET(__wt_rwlock_init(session, &txn_global->scan_rwlock)); + WT_RET(__wt_rwlock_init(session, &txn_global->nsnap_rwlock)); txn_global->nsnap_oldest_id = WT_TXN_NONE; TAILQ_INIT(&txn_global->nsnaph); diff --git a/test/csuite/Makefile.am b/test/csuite/Makefile.am index 10ab890f2f5..f2b4fcacdc8 100644 --- a/test/csuite/Makefile.am +++ b/test/csuite/Makefile.am @@ -57,6 +57,9 @@ noinst_PROGRAMS += test_wt3135_search_near_collator test_wt3184_dup_index_collator_SOURCES = wt3184_dup_index_collator/main.c noinst_PROGRAMS += test_wt3184_dup_index_collator +test_rwlock_SOURCES = rwlock/main.c +noinst_PROGRAMS += test_rwlock + # Run this during a "make check" smoke test. TESTS = $(noinst_PROGRAMS) LOG_COMPILER = $(TEST_WRAPPER) diff --git a/test/csuite/rwlock/main.c b/test/csuite/rwlock/main.c new file mode 100644 index 00000000000..04813182478 --- /dev/null +++ b/test/csuite/rwlock/main.c @@ -0,0 +1,184 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "test_util.h" + +/* + * JIRA ticket reference: HELP-4355 + * Test rwlock collapse under load. + */ +#define MAX_THREADS 1000 +#define READS_PER_WRITE 10000 +//#define READS_PER_WRITE 1000000 +//#define READS_PER_WRITE 100 + +#define CHECK_CORRECTNESS 1 +//#define USE_POSIX 1 + +static WT_RWLOCK rwlock; +static pthread_rwlock_t p_rwlock; +static bool running; +static uint64_t shared_counter; + +void *thread_rwlock(void *); +void *thread_dump(void *); + +int +main(int argc, char *argv[]) +{ + TEST_OPTS *opts, _opts; + struct timespec te, ts; + pthread_t dump_id, id[MAX_THREADS]; + int i; + + if (!testutil_enable_long_tests()) /* Ignore unless requested */ + return (EXIT_SUCCESS); + + opts = &_opts; + memset(opts, 0, sizeof(*opts)); + opts->nthreads = 100; + opts->nops = 1000000; /* per thread */ + testutil_check(testutil_parse_opts(argc, argv, opts)); + running = true; + + testutil_make_work_dir(opts->home); + testutil_check(wiredtiger_open(opts->home, NULL, + "create,session_max=1000,statistics=(fast)", &opts->conn)); + + testutil_check(__wt_rwlock_init(NULL, &rwlock)); + testutil_check(pthread_rwlock_init(&p_rwlock, NULL)); + + testutil_check(pthread_create( + &dump_id, NULL, thread_dump, (void *)opts)); + + __wt_epoch(NULL, &ts); + for (i = 0; i < (int)opts->nthreads; ++i) + testutil_check(pthread_create( + &id[i], NULL, thread_rwlock, (void *)opts)); + + while (--i >= 0) + testutil_check(pthread_join(id[i], NULL)); + __wt_epoch(NULL, &te); + printf("%.2lf\n", WT_TIMEDIFF_MS(te, ts) / 1000.0); + + running = false; + testutil_check(pthread_join(dump_id, NULL)); + + testutil_check(pthread_rwlock_destroy(&p_rwlock)); + testutil_cleanup(opts); + return (EXIT_SUCCESS); +} + +/* + * Acquire a rwlock, every Nth operation, acquire exclusive. + */ +void * +thread_rwlock(void *arg) +{ + TEST_OPTS *opts; + WT_SESSION *wt_session; + WT_SESSION_IMPL *session; + uint64_t i, counter; + bool writelock; + + opts = (TEST_OPTS *)arg; + testutil_check( + opts->conn->open_session(opts->conn, NULL, NULL, &wt_session)); + session = (WT_SESSION_IMPL *)wt_session; + + printf("Running rwlock thread\n"); + for (i = 1; i <= opts->nops; ++i) { + writelock = (i % READS_PER_WRITE == 0); + +#ifdef USE_POSIX + if (writelock) + testutil_check(pthread_rwlock_wrlock(&p_rwlock)); + else + testutil_check(pthread_rwlock_rdlock(&p_rwlock)); +#else + if (writelock) + __wt_writelock(session, &rwlock); + else + __wt_readlock(session, &rwlock); +#endif + + /* + * Do a tiny amount of work inside the lock so the compiler + * can't optimize everything away. + */ + (void)__wt_atomic_add64(&counter, 1); + +#ifdef CHECK_CORRECTNESS + if (writelock) + counter = ++shared_counter; + else + counter = shared_counter; + + __wt_yield(); + + testutil_assert(counter == shared_counter); +#endif + +#ifdef USE_POSIX + testutil_check(pthread_rwlock_unlock(&p_rwlock)); +#else + if (writelock) + __wt_writeunlock(session, &rwlock); + else + __wt_readunlock(session, &rwlock); +#endif + + if (i % 10000 == 0) { + printf("%s", session->id == 20 ? ".\n" : "."); + fflush(stdout); + } + } + + opts->running = false; + + return (NULL); +} + +void * +thread_dump(void *arg) { + WT_UNUSED(arg); + + while (running) { + sleep(1); + printf("\n" + "rwlock { current %" PRIu8 ", next %" PRIu8 + ", reader %" PRIu8 ", readers_active %" PRIu16 + ", readers_queued %" PRIu16 " }\n", + rwlock.u.s.current, + rwlock.u.s.next, + rwlock.u.s.reader, + rwlock.u.s.readers_active, + rwlock.u.s.readers_queued); + } + + return (NULL); +} -- cgit v1.2.1 From 9fb0f938d51681447695ca338f554802875fc316 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Fri, 2 Jun 2017 21:40:59 +1000 Subject: WT-3354 Fix bugs found by Coverity. (#3451) * WT-3354 Fix bugs found by Coverity. * two cases where error checking for rwlocks should goto the error label for cleanup. * LSM code not restoring isolation if a checkpoint fails part way through * Take care with ordering an assertion after a read barrier. We just had an assertion failure on PPC, and from inspection it looks like read in the assertion could be scheduled before read that sees the ticket allocated. We have a read barrier in this path to protect against exactly that kind of thing happening to application data, move the assertion after it so our diagnostics are also safe. --- src/btree/row_modify.c | 7 ++++--- src/lsm/lsm_tree.c | 2 +- src/lsm/lsm_work_unit.c | 3 ++- src/support/mtx_rw.c | 4 ++-- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c index c7afdcfcb31..cab07341a1c 100644 --- a/src/btree/row_modify.c +++ b/src/btree/row_modify.c @@ -15,12 +15,13 @@ int __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) { + WT_DECL_RET; WT_PAGE_MODIFY *modify; WT_RET(__wt_calloc_one(session, &modify)); /* Initialize the spinlock for the page. */ - WT_RET(__wt_spin_init(session, &modify->page_lock, "btree page")); + WT_ERR(__wt_spin_init(session, &modify->page_lock, "btree page")); /* * Multiple threads of control may be searching and deciding to modify @@ -31,8 +32,8 @@ __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page) if (__wt_atomic_cas_ptr(&page->modify, NULL, modify)) __wt_cache_page_inmem_incr(session, page, sizeof(*modify)); else - __wt_free(session, modify); - return (0); +err: __wt_free(session, modify); + return (ret); } /* diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 9932ba6b5b3..62ec44764e7 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -472,7 +472,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session, /* Try to open the tree. */ WT_RET(__wt_calloc_one(session, &lsm_tree)); - WT_RET(__wt_rwlock_init(session, &lsm_tree->rwlock)); + WT_ERR(__wt_rwlock_init(session, &lsm_tree->rwlock)); WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri)); diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index 1b789b87d2a..ec55de31e0d 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -328,8 +328,9 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, */ saved_isolation = session->txn.isolation; session->txn.isolation = WT_ISO_READ_UNCOMMITTED; - WT_ERR(__wt_cache_op(session, WT_SYNC_WRITE_LEAVES)); + ret = __wt_cache_op(session, WT_SYNC_WRITE_LEAVES); session->txn.isolation = saved_isolation; + WT_ERR(ret); __wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointing %s", chunk->uri); diff --git a/src/support/mtx_rw.c b/src/support/mtx_rw.c index d86d75a5340..6e8ca78cfbd 100644 --- a/src/support/mtx_rw.c +++ b/src/support/mtx_rw.c @@ -235,8 +235,6 @@ stall: __wt_cond_wait( } } - WT_ASSERT(session, l->u.s.readers_active > 0); - /* * Applications depend on a barrier here so that operations holding the * lock see consistent data. The atomic operation above isn't @@ -245,6 +243,8 @@ stall: __wt_cond_wait( * meantime. */ WT_READ_BARRIER(); + + WT_ASSERT(session, l->u.s.readers_active > 0); } /* -- cgit v1.2.1 From d4986a2b851a7e507c3748d660845cc32b21e3c1 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Fri, 2 Jun 2017 16:12:00 -0400 Subject: WT-3351 Fix assertion condition. (#3452) --- src/log/log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/log/log.c b/src/log/log.c index 960b87106cc..868f5d0eaf4 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -476,7 +476,7 @@ __wt_log_reset(WT_SESSION_IMPL *session, uint32_t lognum) for (i = 0; i < logcount; i++) { WT_ERR(__wt_log_extract_lognum( session, logfiles[i], &old_lognum)); - WT_ASSERT(session, old_lognum < lognum); + WT_ASSERT(session, old_lognum < lognum || lognum == 1); WT_ERR(__wt_log_remove(session, WT_LOG_FILENAME, old_lognum)); } log->fileid = lognum; -- cgit v1.2.1 From ad515323be891ce52216f410fb1d2d8a45cc3cbe Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Tue, 6 Jun 2017 17:07:21 +1000 Subject: WT-3356 Use atomic reads of rwlocks. (#3454) * WT-3356 Use atomic reads of rwlocks. Previously we had some conditions that checked several fields within a rwlock by indirecting to the live structure. Switch to always doing a read of the full 64-bit value, then using local reads from the copy. Otherwise, we're relying on the compiler and the memory model to order the structure accesses in "code execution order". That could explain assertion failures and/or incorrect behavior with the new rwlock implementation. * Change all waits to 10ms. Previously when stalling waiting to get into the lock we would wait for 1ms, but once queued we waited forever. The former is probably too aggressive (burns too much CPU when we should be able to wait for a notification), and the latter is dangerous if a notification is ever lost (a thread with a ticket may never wake up). --- src/support/mtx_rw.c | 81 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 30 deletions(-) diff --git a/src/support/mtx_rw.c b/src/support/mtx_rw.c index 6e8ca78cfbd..2354ad4f4cc 100644 --- a/src/support/mtx_rw.c +++ b/src/support/mtx_rw.c @@ -121,23 +121,22 @@ __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_STAT_CONN_INCR(session, rwlock_read); - new.u.v = old.u.v = l->u.v; + old.u.v = l->u.v; - /* - * This read lock can only be granted if there are no active writers. - * - * Also check for overflow in case there are 64K active readers. - */ - if (old.u.s.current != old.u.s.next || - new.u.s.readers_active == UINT16_MAX) + /* This read lock can only be granted if there are no active writers. */ + if (old.u.s.current != old.u.s.next) return (EBUSY); /* * The replacement lock value is a result of adding an active reader. - * - * We rely on this atomic operation to provide a barrier. + * Check for overflow: if the maximum number of readers are already + * active, no new readers can enter the lock. */ - new.u.s.readers_active++; + new.u.v = old.u.v; + if (++new.u.s.readers_active == 0) + return (EBUSY); + + /* We rely on this atomic operation to provide a barrier. */ return (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v) ? 0 : EBUSY); } @@ -179,7 +178,8 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) new.u.v = old.u.v; /* * Check for overflow: if the maximum number of readers - * are already active, wait to try again. + * are already active, no new readers can enter the + * lock. */ if (++new.u.s.readers_active == 0) goto stall; @@ -202,8 +202,8 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) */ writers_active = old.u.s.next - old.u.s.current; if (old.u.s.readers_queued > writers_active) { -stall: __wt_cond_wait( - session, l->cond_readers, WT_THOUSAND, NULL); +stall: __wt_cond_wait(session, + l->cond_readers, 10 * WT_THOUSAND, NULL); continue; } @@ -230,8 +230,8 @@ stall: __wt_cond_wait( else { session->current_rwlock = l; session->current_rwticket = ticket; - __wt_cond_wait( - session, l->cond_readers, 0, __read_blocked); + __wt_cond_wait(session, + l->cond_readers, 10 * WT_THOUSAND, __read_blocked); } } @@ -244,7 +244,9 @@ stall: __wt_cond_wait( */ WT_READ_BARRIER(); - WT_ASSERT(session, l->u.s.readers_active > 0); + /* Sanity check that we (still) have the lock. */ + WT_ASSERT(session, + ticket == l->u.s.current && l->u.s.readers_active > 0); } /* @@ -337,26 +339,37 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_STAT_CONN_INCR(session, rwlock_write); for (;;) { - new.u.v = old.u.v = l->u.v; + old.u.v = l->u.v; + + /* Allocate a ticket. */ + new.u.v = old.u.v; ticket = new.u.s.next++; /* - * Avoid wrapping: if we allocate more than 256 tickets, two - * lockers will simultaneously be granted the lock. + * Check for overflow: if the next ticket is allowed to catch + * up with the current batch, two writers could be granted the + * lock simultaneously. */ if (new.u.s.current == new.u.s.next) { - __wt_cond_wait( - session, l->cond_writers, WT_THOUSAND, NULL); + __wt_cond_wait(session, + l->cond_writers, 10 * WT_THOUSAND, NULL); continue; } if (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v)) break; } - /* Wait for our group to start and any readers to drain. */ - for (pause_cnt = 0; - ticket != l->u.s.current || l->u.s.readers_active != 0; - pause_cnt++) { + /* + * Wait for our group to start and any readers to drain. + * + * We take care here to do an atomic read of the full 64-bit lock + * value. Otherwise, reads are not guaranteed to be ordered and we + * could see no readers active from a different batch and decide that + * we have the lock. + */ + for (pause_cnt = 0, old.u.v = l->u.v; + ticket != old.u.s.current || old.u.s.readers_active != 0; + pause_cnt++, old.u.v = l->u.v) { if (pause_cnt < 1000) WT_PAUSE(); else if (pause_cnt < 1200) @@ -364,8 +377,8 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) else { session->current_rwlock = l; session->current_rwticket = ticket; - __wt_cond_wait( - session, l->cond_writers, 0, __write_blocked); + __wt_cond_wait(session, + l->cond_writers, 10 * WT_THOUSAND, __write_blocked); } } @@ -377,6 +390,10 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) * meantime. */ WT_READ_BARRIER(); + + /* Sanity check that we (still) have the lock. */ + WT_ASSERT(session, + ticket == l->u.s.current && l->u.s.readers_active == 0); } /* @@ -389,7 +406,7 @@ __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_RWLOCK new, old; do { - new.u.v = old.u.v = l->u.v; + old.u.v = l->u.v; /* * We're holding the lock exclusive, there shouldn't be any @@ -404,6 +421,7 @@ __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) * to active: this could race with new readlock requests, so we * have to spin. */ + new.u.v = old.u.v; if (++new.u.s.current == new.u.s.reader) { new.u.s.readers_active = new.u.s.readers_queued; new.u.s.readers_queued = 0; @@ -426,8 +444,11 @@ __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) bool __wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l) { + WT_RWLOCK old; + WT_UNUSED(session); - return (l->u.s.current != l->u.s.next || l->u.s.readers_active != 0); + old.u.v = l->u.v; + return (old.u.s.current != old.u.s.next || old.u.s.readers_active != 0); } #endif -- cgit v1.2.1 From 683b5d097b955d24903a2b527ad085a1488e3d3f Mon Sep 17 00:00:00 2001 From: David Hows Date: Wed, 7 Jun 2017 16:39:14 +1000 Subject: WT-3332 Add per-connection stats for update transaction conflicts (#3448) --- dist/stat_data.py | 1 + src/include/stat.h | 1 + src/include/txn.i | 2 ++ src/include/wiredtiger.in | 2 ++ src/support/stat.c | 3 +++ 5 files changed, 9 insertions(+) diff --git a/dist/stat_data.py b/dist/stat_data.py index acc156b947e..0850853fa3b 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -436,6 +436,7 @@ connection_stats = [ TxnStat('txn_snapshots_created', 'number of named snapshots created'), TxnStat('txn_snapshots_dropped', 'number of named snapshots dropped'), TxnStat('txn_sync', 'transaction sync calls'), + TxnStat('txn_update_conflict', 'update conflicts'), ########################################## # Yield statistics diff --git a/src/include/stat.h b/src/include/stat.h index 7c2529f1746..cbd2bb9af5e 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -513,6 +513,7 @@ struct __wt_connection_stats { int64_t txn_sync; int64_t txn_commit; int64_t txn_rollback; + int64_t txn_update_conflict; }; /* diff --git a/src/include/txn.i b/src/include/txn.i index f7321af5b12..f4f571cb67e 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -424,6 +424,8 @@ __wt_txn_update_check(WT_SESSION_IMPL *session, WT_UPDATE *upd) if (txn->isolation == WT_ISO_SNAPSHOT) while (upd != NULL && !__wt_txn_visible(session, upd->txnid)) { if (upd->txnid != WT_TXN_ABORTED) { + WT_STAT_CONN_INCR( + session, txn_update_conflict); WT_STAT_DATA_INCR( session, txn_update_conflict); return (WT_ROLLBACK); diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 2bbe812d7f7..03ce35313c5 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -4986,6 +4986,8 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_TXN_COMMIT 1254 /*! transaction: transactions rolled back */ #define WT_STAT_CONN_TXN_ROLLBACK 1255 +/*! transaction: update conflicts */ +#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1256 /*! * @} diff --git a/src/support/stat.c b/src/support/stat.c index 061615c0931..dcad5351fe8 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -888,6 +888,7 @@ static const char * const __stats_connection_desc[] = { "transaction: transaction sync calls", "transaction: transactions committed", "transaction: transactions rolled back", + "transaction: update conflicts", }; int @@ -1186,6 +1187,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->txn_sync = 0; stats->txn_commit = 0; stats->txn_rollback = 0; + stats->txn_update_conflict = 0; } void @@ -1563,6 +1565,7 @@ __wt_stat_connection_aggregate( to->txn_sync += WT_STAT_READ(from, txn_sync); to->txn_commit += WT_STAT_READ(from, txn_commit); to->txn_rollback += WT_STAT_READ(from, txn_rollback); + to->txn_update_conflict += WT_STAT_READ(from, txn_update_conflict); } static const char * const __stats_join_desc[] = { -- cgit v1.2.1 From 6a2b4ced47d225ee40b1e37a5d85964c49d80389 Mon Sep 17 00:00:00 2001 From: David Hows Date: Wed, 7 Jun 2017 17:05:03 +1000 Subject: WT-3169 Add verbose messages regarding lookaside file usage (#3425) * Verbose messages for lookaside activity are generated once-per-checkpoint. --- dist/api_data.py | 1 + dist/flags.py | 1 + src/btree/bt_read.c | 48 ++++++++++++++++++++++++++++++++++++--- src/cache/cache_las.c | 12 ++++------ src/config/config_def.c | 55 ++++++++++++++++++++++++-------------------- src/conn/conn_api.c | 1 + src/include/connection.h | 10 +++++++- src/include/flags.h | 39 +++++++++++++++---------------- src/include/wiredtiger.in | 24 ++++++++++---------- src/reconcile/rec_write.c | 58 +++++++++++++++++++++++++++++++++++++++++++---- 10 files changed, 178 insertions(+), 71 deletions(-) diff --git a/dist/api_data.py b/dist/api_data.py index 22600dd5e29..3297c68147a 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -529,6 +529,7 @@ connection_runtime_config = [ 'fileops', 'handleops', 'log', + 'lookaside_activity', 'lsm', 'lsm_manager', 'metadata', diff --git a/dist/flags.py b/dist/flags.py index d80c80a37ce..8edabd69648 100644 --- a/dist/flags.py +++ b/dist/flags.py @@ -67,6 +67,7 @@ flags = { 'VERB_FILEOPS', 'VERB_HANDLEOPS', 'VERB_LOG', + 'VERB_LOOKASIDE', 'VERB_LSM', 'VERB_LSM_MANAGER', 'VERB_METADATA', diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c index de84a711019..3f85e58f088 100644 --- a/src/btree/bt_read.c +++ b/src/btree/bt_read.c @@ -8,6 +8,8 @@ #include "wt_internal.h" +static void __btree_verbose_lookaside_read(WT_SESSION_IMPL *); + /* * __wt_las_remove_block -- * Remove all records matching a key prefix from the lookaside store. @@ -19,8 +21,7 @@ __wt_las_remove_block(WT_SESSION_IMPL *session, WT_DECL_ITEM(las_addr); WT_DECL_ITEM(las_key); WT_DECL_RET; - uint64_t las_counter, las_txnid; - int64_t remove_cnt; + uint64_t las_counter, las_txnid, remove_cnt; uint32_t las_id; int exact; @@ -74,7 +75,7 @@ err: __wt_scr_free(session, &las_addr); if (remove_cnt > S2C(session)->las_record_cnt) S2C(session)->las_record_cnt = 0; else if (remove_cnt > 0) - (void)__wt_atomic_subi64( + (void)__wt_atomic_sub64( &S2C(session)->las_record_cnt, remove_cnt); return (ret); @@ -451,6 +452,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref) */ dsk = tmp.data; if (F_ISSET(dsk, WT_PAGE_LAS_UPDATE) && __wt_las_is_written(session)) { + __btree_verbose_lookaside_read(session); WT_STAT_CONN_INCR(session, cache_read_lookaside); WT_STAT_DATA_INCR(session, cache_read_lookaside); @@ -680,3 +682,43 @@ skip_evict: __wt_sleep(0, sleep_cnt); } } + +/* + * __btree_verbose_lookaside_read -- + * Create a verbose message to display at most once per checkpoint when + * performing a lookaside table read. + */ +static void +__btree_verbose_lookaside_read(WT_SESSION_IMPL *session) +{ +#ifdef HAVE_VERBOSE + WT_CONNECTION_IMPL *conn; + uint64_t ckpt_gen_current, ckpt_gen_last; + + if (!WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE)) return; + + conn = S2C(session); + ckpt_gen_current = __wt_gen(session, WT_GEN_CHECKPOINT); + ckpt_gen_last = conn->las_verb_gen_read; + + /* + * This message is throttled to one per checkpoint. To do this we + * track the generation of the last checkpoint for which the message + * was printed and check against the current checkpoint generation. + */ + if (ckpt_gen_current > ckpt_gen_last) { + /* + * Attempt to atomically replace the last checkpoint generation + * for which this message was printed. If the atomic swap fails + * we have raced and the winning thread will print the message. + */ + if (__wt_atomic_casv64(&conn->las_verb_gen_read, + ckpt_gen_last, ckpt_gen_current)) { + __wt_verbose(session, WT_VERB_LOOKASIDE, + "Read from lookaside file triggered."); + } + } +#else + WT_UNUSED(session); +#endif +} diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c index 06c6354148c..a2233514223 100644 --- a/src/cache/cache_las.c +++ b/src/cache/cache_las.c @@ -292,8 +292,7 @@ __wt_las_sweep(WT_SESSION_IMPL *session) WT_DECL_ITEM(las_key); WT_DECL_RET; WT_ITEM *key; - uint64_t cnt, las_counter, las_txnid; - int64_t remove_cnt; + uint64_t cnt, las_counter, las_txnid, remove_cnt; uint32_t las_id, session_flags; int notused; @@ -342,7 +341,7 @@ __wt_las_sweep(WT_SESSION_IMPL *session) * blocks in the cache in order to get rid of them, and slowly review * lookaside blocks that have already been evicted. */ - cnt = (uint64_t)WT_MAX(100, conn->las_record_cnt / 30); + cnt = WT_MAX(100, conn->las_record_cnt / 30); /* Discard pages we read as soon as we're done with them. */ F_SET(session, WT_SESSION_NO_CACHE); @@ -390,14 +389,13 @@ err: __wt_buf_free(session, key); WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); /* - * If there were races to remove records, we can over-count. All - * arithmetic is signed, so underflow isn't fatal, but check anyway so - * we don't skew low over time. + * If there were races to remove records, we can over-count. Underflow + * isn't fatal, but check anyway so we don't skew low over time. */ if (remove_cnt > conn->las_record_cnt) conn->las_record_cnt = 0; else if (remove_cnt > 0) - (void)__wt_atomic_subi64(&conn->las_record_cnt, remove_cnt); + (void)__wt_atomic_sub64(&conn->las_record_cnt, remove_cnt); F_CLR(session, WT_SESSION_NO_CACHE); diff --git a/src/config/config_def.c b/src/config/config_def.c index f152fbacad4..a7397d21c6a 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -148,11 +148,12 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," - "\"handleops\",\"log\",\"lsm\",\"lsm_manager\",\"metadata\"," - "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," - "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," - "\"split\",\"temporary\",\"thread_group\",\"transaction\"," - "\"verify\",\"version\",\"write\"]", + "\"handleops\",\"log\",\"lookaside_activity\",\"lsm\"," + "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," + "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," + "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," + "\"thread_group\",\"transaction\",\"verify\",\"version\"," + "\"write\"]", NULL, 0 }, { NULL, NULL, NULL, NULL, NULL, 0 } }; @@ -751,11 +752,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," - "\"handleops\",\"log\",\"lsm\",\"lsm_manager\",\"metadata\"," - "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," - "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," - "\"split\",\"temporary\",\"thread_group\",\"transaction\"," - "\"verify\",\"version\",\"write\"]", + "\"handleops\",\"log\",\"lookaside_activity\",\"lsm\"," + "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," + "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," + "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," + "\"thread_group\",\"transaction\",\"verify\",\"version\"," + "\"write\"]", NULL, 0 }, { "write_through", "list", NULL, "choices=[\"data\",\"log\"]", @@ -838,11 +840,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," - "\"handleops\",\"log\",\"lsm\",\"lsm_manager\",\"metadata\"," - "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," - "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," - "\"split\",\"temporary\",\"thread_group\",\"transaction\"," - "\"verify\",\"version\",\"write\"]", + "\"handleops\",\"log\",\"lookaside_activity\",\"lsm\"," + "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," + "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," + "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," + "\"thread_group\",\"transaction\",\"verify\",\"version\"," + "\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { "write_through", "list", @@ -920,11 +923,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," - "\"handleops\",\"log\",\"lsm\",\"lsm_manager\",\"metadata\"," - "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," - "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," - "\"split\",\"temporary\",\"thread_group\",\"transaction\"," - "\"verify\",\"version\",\"write\"]", + "\"handleops\",\"log\",\"lookaside_activity\",\"lsm\"," + "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," + "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," + "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," + "\"thread_group\",\"transaction\",\"verify\",\"version\"," + "\"write\"]", NULL, 0 }, { "version", "string", NULL, NULL, NULL, 0 }, { "write_through", "list", @@ -1002,11 +1006,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," "\"evict\",\"evict_stuck\",\"evictserver\",\"fileops\"," - "\"handleops\",\"log\",\"lsm\",\"lsm_manager\",\"metadata\"," - "\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\"," - "\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\"," - "\"split\",\"temporary\",\"thread_group\",\"transaction\"," - "\"verify\",\"version\",\"write\"]", + "\"handleops\",\"log\",\"lookaside_activity\",\"lsm\"," + "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," + "\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\"," + "\"salvage\",\"shared_cache\",\"split\",\"temporary\"," + "\"thread_group\",\"transaction\",\"verify\",\"version\"," + "\"write\"]", NULL, 0 }, { "write_through", "list", NULL, "choices=[\"data\",\"log\"]", diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index c0a1f5c0920..70e96aa8473 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1803,6 +1803,7 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) { "fileops", WT_VERB_FILEOPS }, { "handleops", WT_VERB_HANDLEOPS }, { "log", WT_VERB_LOG }, + { "lookaside_activity", WT_VERB_LOOKASIDE }, { "lsm", WT_VERB_LSM }, { "lsm_manager", WT_VERB_LSM_MANAGER }, { "metadata", WT_VERB_METADATA }, diff --git a/src/include/connection.h b/src/include/connection.h index bf2f8a2c7e1..56d801cd361 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -360,7 +360,15 @@ struct __wt_connection_impl { bool las_written; /* Lookaside table has been written */ WT_ITEM las_sweep_key; /* Sweep server's saved key */ - int64_t las_record_cnt;/* Count of lookaside records */ + uint64_t las_record_cnt;/* Count of lookaside records */ + + /* + * The "lookaside_activity" verbose messages are throttled to once per + * checkpoint. To accomplish this we track the checkpoint generation + * for the most recent read and write verbose messages. + */ + volatile uint64_t las_verb_gen_read; + volatile uint64_t las_verb_gen_write; /* Locked: collator list */ TAILQ_HEAD(__wt_coll_qh, __wt_named_collator) collqh; diff --git a/src/include/flags.h b/src/include/flags.h index d7c0e0f9472..919c0dd2f98 100644 --- a/src/include/flags.h +++ b/src/include/flags.h @@ -95,25 +95,26 @@ #define WT_VERB_FILEOPS 0x00000080 #define WT_VERB_HANDLEOPS 0x00000100 #define WT_VERB_LOG 0x00000200 -#define WT_VERB_LSM 0x00000400 -#define WT_VERB_LSM_MANAGER 0x00000800 -#define WT_VERB_METADATA 0x00001000 -#define WT_VERB_MUTEX 0x00002000 -#define WT_VERB_OVERFLOW 0x00004000 -#define WT_VERB_READ 0x00008000 -#define WT_VERB_REBALANCE 0x00010000 -#define WT_VERB_RECONCILE 0x00020000 -#define WT_VERB_RECOVERY 0x00040000 -#define WT_VERB_RECOVERY_PROGRESS 0x00080000 -#define WT_VERB_SALVAGE 0x00100000 -#define WT_VERB_SHARED_CACHE 0x00200000 -#define WT_VERB_SPLIT 0x00400000 -#define WT_VERB_TEMPORARY 0x00800000 -#define WT_VERB_THREAD_GROUP 0x01000000 -#define WT_VERB_TRANSACTION 0x02000000 -#define WT_VERB_VERIFY 0x04000000 -#define WT_VERB_VERSION 0x08000000 -#define WT_VERB_WRITE 0x10000000 +#define WT_VERB_LOOKASIDE 0x00000400 +#define WT_VERB_LSM 0x00000800 +#define WT_VERB_LSM_MANAGER 0x00001000 +#define WT_VERB_METADATA 0x00002000 +#define WT_VERB_MUTEX 0x00004000 +#define WT_VERB_OVERFLOW 0x00008000 +#define WT_VERB_READ 0x00010000 +#define WT_VERB_REBALANCE 0x00020000 +#define WT_VERB_RECONCILE 0x00040000 +#define WT_VERB_RECOVERY 0x00080000 +#define WT_VERB_RECOVERY_PROGRESS 0x00100000 +#define WT_VERB_SALVAGE 0x00200000 +#define WT_VERB_SHARED_CACHE 0x00400000 +#define WT_VERB_SPLIT 0x00800000 +#define WT_VERB_TEMPORARY 0x01000000 +#define WT_VERB_THREAD_GROUP 0x02000000 +#define WT_VERB_TRANSACTION 0x04000000 +#define WT_VERB_VERIFY 0x08000000 +#define WT_VERB_VERSION 0x10000000 +#define WT_VERB_WRITE 0x20000000 #define WT_VISIBILITY_ERR 0x00000080 /* * flags section: END diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 03ce35313c5..28e6da2d6d9 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -2087,12 +2087,12 @@ struct __wt_connection { * list\, with values chosen from the following options: \c "api"\, \c * "block"\, \c "checkpoint"\, \c "compact"\, \c "evict"\, \c * "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c "handleops"\, \c - * "log"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c - * "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c - * "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c - * "shared_cache"\, \c "split"\, \c "temporary"\, \c "thread_group"\, \c - * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default - * empty.} + * "log"\, \c "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c + * "metadata"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c + * "rebalance"\, \c "reconcile"\, \c "recovery"\, \c + * "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, + * \c "temporary"\, \c "thread_group"\, \c "transaction"\, \c "verify"\, + * \c "version"\, \c "write"; default empty.} * @configend * @errors */ @@ -2619,12 +2619,12 @@ struct __wt_connection { * list\, such as "verbose=[evictserver\,read]"., a list\, with * values chosen from the following options: \c "api"\, \c "block"\, \c * "checkpoint"\, \c "compact"\, \c "evict"\, \c "evict_stuck"\, \c - * "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c "lsm"\, \c - * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c - * "rebalance"\, \c "reconcile"\, \c "recovery"\, \c "recovery_progress"\, \c - * "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c - * "thread_group"\, \c "transaction"\, \c "verify"\, \c "version"\, \c "write"; - * default empty.} + * "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c + * "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c + * "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c + * "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c + * "split"\, \c "temporary"\, \c "thread_group"\, \c "transaction"\, \c + * "verify"\, \c "version"\, \c "write"; default empty.} * @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to * files. Ignored on non-Windows systems. Options are given as a list\, such * as "write_through=[data]". Configuring \c write_through requires diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 8bff4c630c0..f7df73c4ecb 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -351,6 +351,7 @@ static int __rec_dictionary_init(WT_SESSION_IMPL *, WT_RECONCILE *, u_int); static int __rec_dictionary_lookup( WT_SESSION_IMPL *, WT_RECONCILE *, WT_KV *, WT_DICTIONARY **); static void __rec_dictionary_reset(WT_RECONCILE *); +static void __rec_verbose_lookaside_write(WT_SESSION_IMPL *); /* * __wt_reconcile -- @@ -3567,8 +3568,7 @@ __rec_update_las(WT_SESSION_IMPL *session, WT_PAGE *page; WT_SAVE_UPD *list; WT_UPDATE *upd; - uint64_t las_counter; - int64_t insert_cnt; + uint64_t insert_cnt, las_counter; uint32_t i, session_flags, slot; uint8_t *p; @@ -3683,9 +3683,11 @@ __rec_update_las(WT_SESSION_IMPL *session, err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); - if (insert_cnt > 0) - (void)__wt_atomic_addi64( + if (insert_cnt > 0) { + (void)__wt_atomic_add64( &S2C(session)->las_record_cnt, insert_cnt); + __rec_verbose_lookaside_write(session); + } __wt_scr_free(session, &key); return (ret); @@ -6577,3 +6579,51 @@ __rec_dictionary_lookup( *dpp = next; return (0); } + +/* + * __rec_verbose_lookaside_write -- + * Create a verbose message to display once per checkpoint with details + * about the cache state when performing a lookaside table write. + */ +static void +__rec_verbose_lookaside_write(WT_SESSION_IMPL *session) +{ +#ifdef HAVE_VERBOSE + WT_CONNECTION_IMPL *conn; + uint64_t ckpt_gen_current, ckpt_gen_last; + uint32_t pct_dirty, pct_full; + + if (!WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE)) return; + + conn = S2C(session); + ckpt_gen_current = __wt_gen(session, WT_GEN_CHECKPOINT); + ckpt_gen_last = conn->las_verb_gen_write; + + /* + * This message is throttled to one per checkpoint. To do this we + * track the generation of the last checkpoint for which the message + * was printed and check against the current checkpoint generation. + */ + if (ckpt_gen_current > ckpt_gen_last) { + /* + * Attempt to atomically replace the last checkpoint generation + * for which this message was printed. If the atomic swap fails + * we have raced and the winning thread will print the message. + */ + if (__wt_atomic_casv64(&conn->las_verb_gen_write, + ckpt_gen_last, ckpt_gen_current)) { + (void)__wt_eviction_clean_needed(session, &pct_full); + (void)__wt_eviction_dirty_needed(session, &pct_dirty); + + __wt_verbose(session, WT_VERB_LOOKASIDE, + "Page reconciliation triggered lookaside write. " + "Entries now in lookaside file: %" PRIu64 ", " + "cache dirty: %" PRIu32 "%% , " + "cache use: %" PRIu32 "%%", + conn->las_record_cnt, pct_dirty, pct_full); + } + } +#else + WT_UNUSED(session); +#endif +} -- cgit v1.2.1 From be80ea7fbcb49d4c6d5531004cbbe0987eb68c76 Mon Sep 17 00:00:00 2001 From: Sulabh Mahajan Date: Wed, 7 Jun 2017 17:45:56 +1000 Subject: WT-3349 Add rwlock statistics (#3457) --- dist/s_stat | 7 +- dist/s_string.ok | 1 + dist/stat_data.py | 8 +- src/conn/conn_handle.c | 4 +- src/evict/evict_lru.c | 23 +---- src/include/mutex.h | 24 +++++ src/include/stat.h | 8 +- src/include/wiredtiger.in | 248 ++++++++++++++++++++++++---------------------- src/support/mtx_rw.c | 48 +++++++++ src/support/stat.c | 31 ++++-- 10 files changed, 249 insertions(+), 153 deletions(-) diff --git a/dist/s_stat b/dist/s_stat index 6aeeca6faa6..cf9303e5f95 100755 --- a/dist/s_stat +++ b/dist/s_stat @@ -25,15 +25,20 @@ cat << UNUSED_STAT_FIELDS lock_checkpoint_count lock_checkpoint_wait_application lock_checkpoint_wait_internal +lock_dhandle_read_count +lock_dhandle_wait_application +lock_dhandle_wait_internal +lock_dhandle_write_count lock_metadata_count lock_metadata_wait_application lock_metadata_wait_internal lock_schema_count lock_schema_wait_application lock_schema_wait_internal -lock_table_count +lock_table_read_count lock_table_wait_application lock_table_wait_internal +lock_table_write_count UNUSED_STAT_FIELDS echo "$search" diff --git a/dist/s_string.ok b/dist/s_string.ok index d5a562fcbd1..4ddb64297f4 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -305,6 +305,7 @@ RMW RNG RPC RUNDIR +RWLOCK RXB Radu ReadFile diff --git a/dist/stat_data.py b/dist/stat_data.py index 0850853fa3b..7b919848003 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -295,16 +295,20 @@ connection_stats = [ LockStat('lock_checkpoint_count', 'checkpoint lock acquisitions'), LockStat('lock_checkpoint_wait_application', 'checkpoint lock application thread wait time (usecs)'), LockStat('lock_checkpoint_wait_internal', 'checkpoint lock internal thread wait time (usecs)'), - LockStat('lock_handle_list_wait_eviction', 'handle-list lock eviction thread wait time (usecs)'), + LockStat('lock_dhandle_read_count', 'dhandle read lock acquisitions'), + LockStat('lock_dhandle_wait_application', 'dhandle lock application thread time waiting for the dhandle lock (usecs)'), + LockStat('lock_dhandle_wait_internal', 'dhandle lock internal thread time waiting for the dhandle lock (usecs)'), + LockStat('lock_dhandle_write_count', 'dhandle write lock acquisitions'), LockStat('lock_metadata_count', 'metadata lock acquisitions'), LockStat('lock_metadata_wait_application', 'metadata lock application thread wait time (usecs)'), LockStat('lock_metadata_wait_internal', 'metadata lock internal thread wait time (usecs)'), LockStat('lock_schema_count', 'schema lock acquisitions'), LockStat('lock_schema_wait_application', 'schema lock application thread wait time (usecs)'), LockStat('lock_schema_wait_internal', 'schema lock internal thread wait time (usecs)'), - LockStat('lock_table_count', 'table lock acquisitions'), + LockStat('lock_table_read_count', 'table read lock acquisitions'), LockStat('lock_table_wait_application', 'table lock application thread time waiting for the table lock (usecs)'), LockStat('lock_table_wait_internal', 'table lock internal thread time waiting for the table lock (usecs)'), + LockStat('lock_table_write_count', 'table write lock acquisitions'), ########################################## # Logging statistics diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index 32a0d80c1f3..2f3f9488b58 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -62,9 +62,9 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file")); /* Read-write locks */ - WT_RET(__wt_rwlock_init(session, &conn->dhandle_lock)); + WT_RWLOCK_INIT_TRACKED(session, &conn->dhandle_lock, dhandle); WT_RET(__wt_rwlock_init(session, &conn->hot_backup_lock)); - WT_RET(__wt_rwlock_init(session, &conn->table_lock)); + WT_RWLOCK_INIT_TRACKED(session, &conn->table_lock, table); /* Setup the spin locks for the LSM manager queues. */ WT_RET(__wt_spin_init(session, diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index b5dd3837531..46291eb63de 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -31,27 +31,16 @@ static int __evict_walk_file( static int __evict_lock_handle_list(WT_SESSION_IMPL *session) { - struct timespec enter, leave; WT_CACHE *cache; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_RWLOCK *dh_lock; u_int spins; - bool dh_stats; conn = S2C(session); cache = conn->cache; dh_lock = &conn->dhandle_lock; - /* - * Setup tracking of handle lock acquisition wait time if statistics - * are enabled. - */ - dh_stats = WT_STAT_ENABLED(session); - - if (dh_stats) - __wt_epoch(session, &enter); - /* * Use a custom lock acquisition back off loop so the eviction server * notices any interrupt quickly. @@ -64,17 +53,7 @@ __evict_lock_handle_list(WT_SESSION_IMPL *session) else __wt_sleep(0, WT_THOUSAND); } - /* - * Only record statistics on success. - */ - WT_RET(ret); - if (dh_stats) { - __wt_epoch(session, &leave); - WT_STAT_CONN_INCRV( - session, lock_handle_list_wait_eviction, - (int64_t)WT_TIMEDIFF_US(leave, enter)); - } - return (0); + return (ret); } /* diff --git a/src/include/mutex.h b/src/include/mutex.h index 5f814c2799e..7aeb6160f43 100644 --- a/src/include/mutex.h +++ b/src/include/mutex.h @@ -50,10 +50,34 @@ struct __wt_rwlock { /* Read/write lock */ } s; } u; + int16_t stat_read_count_off; /* read acquisitions offset */ + int16_t stat_write_count_off; /* write acquisitions offset */ + int16_t stat_app_usecs_off; /* waiting application threads offset */ + int16_t stat_int_usecs_off; /* waiting server threads offset */ + WT_CONDVAR *cond_readers; /* Blocking readers */ WT_CONDVAR *cond_writers; /* Blocking writers */ }; +/* + * WT_RWLOCK_INIT_TRACKED -- + * Read write lock initialization, with tracking. + * + * Implemented as a macro so we can pass in a statistics field and convert + * it into a statistics structure array offset. + */ +#define WT_RWLOCK_INIT_TRACKED(session, l, name) do { \ + WT_RET(__wt_rwlock_init(session, l)); \ + (l)->stat_read_count_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \ + S2C(session)->stats, lock_##name##_read_count); \ + (l)->stat_write_count_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \ + S2C(session)->stats, lock_##name##_write_count); \ + (l)->stat_app_usecs_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \ + S2C(session)->stats, lock_##name##_wait_application); \ + (l)->stat_int_usecs_off = (int16_t)WT_STATS_FIELD_TO_OFFSET( \ + S2C(session)->stats, lock_##name##_wait_internal); \ +} while (0) + /* * Spin locks: * diff --git a/src/include/stat.h b/src/include/stat.h index cbd2bb9af5e..7d7d701590a 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -399,16 +399,20 @@ struct __wt_connection_stats { int64_t lock_checkpoint_count; int64_t lock_checkpoint_wait_application; int64_t lock_checkpoint_wait_internal; - int64_t lock_handle_list_wait_eviction; + int64_t lock_dhandle_wait_application; + int64_t lock_dhandle_wait_internal; + int64_t lock_dhandle_read_count; + int64_t lock_dhandle_write_count; int64_t lock_metadata_count; int64_t lock_metadata_wait_application; int64_t lock_metadata_wait_internal; int64_t lock_schema_count; int64_t lock_schema_wait_application; int64_t lock_schema_wait_internal; - int64_t lock_table_count; int64_t lock_table_wait_application; int64_t lock_table_wait_internal; + int64_t lock_table_read_count; + int64_t lock_table_write_count; int64_t log_slot_switch_busy; int64_t log_bytes_payload; int64_t log_bytes_written; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 28e6da2d6d9..cf7117376af 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -4740,254 +4740,268 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1140 /*! lock: checkpoint lock internal thread wait time (usecs) */ #define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1141 -/*! lock: handle-list lock eviction thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_HANDLE_LIST_WAIT_EVICTION 1142 +/*! + * lock: dhandle lock application thread time waiting for the dhandle + * lock (usecs) + */ +#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1142 +/*! + * lock: dhandle lock internal thread time waiting for the dhandle lock + * (usecs) + */ +#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1143 +/*! lock: dhandle read lock acquisitions */ +#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1144 +/*! lock: dhandle write lock acquisitions */ +#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1145 /*! lock: metadata lock acquisitions */ -#define WT_STAT_CONN_LOCK_METADATA_COUNT 1143 +#define WT_STAT_CONN_LOCK_METADATA_COUNT 1146 /*! lock: metadata lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1144 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1147 /*! lock: metadata lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1145 +#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1148 /*! lock: schema lock acquisitions */ -#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1146 +#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1149 /*! lock: schema lock application thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1147 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1150 /*! lock: schema lock internal thread wait time (usecs) */ -#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1148 -/*! lock: table lock acquisitions */ -#define WT_STAT_CONN_LOCK_TABLE_COUNT 1149 +#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1151 /*! * lock: table lock application thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1150 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1152 /*! * lock: table lock internal thread time waiting for the table lock * (usecs) */ -#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1151 +#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1153 +/*! lock: table read lock acquisitions */ +#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1154 +/*! lock: table write lock acquisitions */ +#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1155 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1152 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1156 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1153 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1157 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1154 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1158 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1155 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1159 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1156 +#define WT_STAT_CONN_LOG_FLUSH 1160 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1157 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1161 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1158 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1162 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1159 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1163 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1160 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1164 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1161 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1165 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1162 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1166 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1163 +#define WT_STAT_CONN_LOG_SCANS 1167 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1164 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1168 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1165 +#define WT_STAT_CONN_LOG_WRITE_LSN 1169 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1166 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1170 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1167 +#define WT_STAT_CONN_LOG_SYNC 1171 /*! log: log sync time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DURATION 1168 +#define WT_STAT_CONN_LOG_SYNC_DURATION 1172 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1169 +#define WT_STAT_CONN_LOG_SYNC_DIR 1173 /*! log: log sync_dir time duration (usecs) */ -#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1170 +#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1174 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1171 +#define WT_STAT_CONN_LOG_WRITES 1175 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1172 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1176 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1173 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1177 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1174 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1178 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1175 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1179 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1176 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1180 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1177 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1181 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1178 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1182 /*! log: slot close lost race */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1179 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1183 /*! log: slot close unbuffered waits */ -#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1180 +#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1184 /*! log: slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1181 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1185 /*! log: slot join atomic update races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1182 +#define WT_STAT_CONN_LOG_SLOT_RACES 1186 /*! log: slot join calls atomic updates raced */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1183 +#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1187 /*! log: slot join calls did not yield */ -#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1184 +#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1188 /*! log: slot join calls found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1185 +#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1189 /*! log: slot join calls slept */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1186 +#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1190 /*! log: slot join calls yielded */ -#define WT_STAT_CONN_LOG_SLOT_YIELD 1187 +#define WT_STAT_CONN_LOG_SLOT_YIELD 1191 /*! log: slot join found active slot closed */ -#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1188 +#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1192 /*! log: slot joins yield time (usecs) */ -#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1189 +#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1193 /*! log: slot transitions unable to find free slot */ -#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1190 +#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1194 /*! log: slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1191 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1195 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1192 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1196 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1193 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1197 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1194 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1198 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1195 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1199 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1196 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1200 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1197 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1201 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1198 +#define WT_STAT_CONN_REC_PAGES 1202 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1199 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1203 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1200 +#define WT_STAT_CONN_REC_PAGE_DELETE 1204 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1201 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1205 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1202 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1206 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1203 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1207 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1204 +#define WT_STAT_CONN_SESSION_OPEN 1208 /*! session: table alter failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1205 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1209 /*! session: table alter successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1206 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1210 /*! session: table alter unchanged and skipped */ -#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1207 +#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1211 /*! session: table compact failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1208 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1212 /*! session: table compact successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1209 +#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1213 /*! session: table create failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1210 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1214 /*! session: table create successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1211 +#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1215 /*! session: table drop failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1212 +#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1216 /*! session: table drop successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1213 +#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1217 /*! session: table rebalance failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1214 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1218 /*! session: table rebalance successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1215 +#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1219 /*! session: table rename failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1216 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1220 /*! session: table rename successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1217 +#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1221 /*! session: table salvage failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1218 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1222 /*! session: table salvage successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1219 +#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1223 /*! session: table truncate failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1220 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1224 /*! session: table truncate successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1221 +#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1225 /*! session: table verify failed calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1222 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1226 /*! session: table verify successful calls */ -#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1223 +#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1227 /*! thread-state: active filesystem fsync calls */ -#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1224 +#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1228 /*! thread-state: active filesystem read calls */ -#define WT_STAT_CONN_THREAD_READ_ACTIVE 1225 +#define WT_STAT_CONN_THREAD_READ_ACTIVE 1229 /*! thread-state: active filesystem write calls */ -#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1226 +#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1230 /*! thread-yield: application thread time evicting (usecs) */ -#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1227 +#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1231 /*! thread-yield: application thread time waiting for cache (usecs) */ -#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1228 +#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1232 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1229 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1233 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1230 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1234 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1231 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1235 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1232 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1236 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1233 +#define WT_STAT_CONN_PAGE_SLEEP 1237 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1234 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1238 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1235 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1239 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1236 +#define WT_STAT_CONN_TXN_BEGIN 1240 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1237 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1241 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1238 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1242 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1239 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1243 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1240 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1244 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1241 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1245 /*! transaction: transaction checkpoint scrub dirty target */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1242 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1246 /*! transaction: transaction checkpoint scrub time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1243 +#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1247 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1244 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1248 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1245 +#define WT_STAT_CONN_TXN_CHECKPOINT 1249 /*! * transaction: transaction checkpoints skipped because database was * clean */ -#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1246 +#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1250 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1247 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1251 /*! * transaction: transaction fsync calls for checkpoint after allocating * the transaction ID */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1248 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1252 /*! * transaction: transaction fsync duration for checkpoint after * allocating the transaction ID (usecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1249 +#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1253 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1250 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1254 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1251 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1255 /*! * transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1252 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1256 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1253 +#define WT_STAT_CONN_TXN_SYNC 1257 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1254 +#define WT_STAT_CONN_TXN_COMMIT 1258 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1255 +#define WT_STAT_CONN_TXN_ROLLBACK 1259 /*! transaction: update conflicts */ -#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1256 +#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1260 /*! * @} diff --git a/src/support/mtx_rw.c b/src/support/mtx_rw.c index 2354ad4f4cc..eeb9c6b72a2 100644 --- a/src/support/mtx_rw.c +++ b/src/support/mtx_rw.c @@ -91,6 +91,8 @@ int __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l) { l->u.v = 0; + l->stat_read_count_off = l->stat_write_count_off = -1; + l->stat_app_usecs_off = l->stat_int_usecs_off = -1; WT_RET(__wt_cond_alloc(session, "rwlock wait", &l->cond_readers)); WT_RET(__wt_cond_alloc(session, "rwlock wait", &l->cond_writers)); @@ -118,8 +120,13 @@ int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) { WT_RWLOCK new, old; + int64_t **stats; WT_STAT_CONN_INCR(session, rwlock_read); + if (l->stat_read_count_off != -1 && WT_STAT_ENABLED(session)) { + stats = (int64_t **)S2C(session)->stats; + stats[session->stat_bucket][l->stat_read_count_off]++; + } old.u.v = l->u.v; @@ -159,11 +166,18 @@ void __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l) { WT_RWLOCK new, old; + struct timespec enter, leave; + int64_t **stats; int pause_cnt; int16_t writers_active; uint8_t ticket; + bool set_stats; WT_STAT_CONN_INCR(session, rwlock_read); + stats = (int64_t **)S2C(session)->stats; + set_stats = (l->stat_read_count_off != -1 && WT_STAT_ENABLED(session)); + if (set_stats) + stats[session->stat_bucket][l->stat_read_count_off]++; WT_DIAGNOSTIC_YIELD; @@ -221,6 +235,8 @@ stall: __wt_cond_wait(session, break; } + if (set_stats) + __wt_epoch(session, &enter); /* Wait for our group to start. */ for (pause_cnt = 0; ticket != l->u.s.current; pause_cnt++) { if (pause_cnt < 1000) @@ -234,6 +250,15 @@ stall: __wt_cond_wait(session, l->cond_readers, 10 * WT_THOUSAND, __read_blocked); } } + if (set_stats) { + __wt_epoch(session, &leave); + if (F_ISSET(session, WT_SESSION_INTERNAL)) + stats[session->stat_bucket][l->stat_int_usecs_off] += + (int64_t)WT_TIMEDIFF_US(leave, enter); + else + stats[session->stat_bucket][l->stat_app_usecs_off] += + (int64_t)WT_TIMEDIFF_US(leave, enter); + } /* * Applications depend on a barrier here so that operations holding the @@ -282,8 +307,13 @@ int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) { WT_RWLOCK new, old; + int64_t **stats; WT_STAT_CONN_INCR(session, rwlock_write); + if (l->stat_write_count_off != -1 && WT_STAT_ENABLED(session)) { + stats = (int64_t **)S2C(session)->stats; + stats[session->stat_bucket][l->stat_write_count_off]++; + } /* * This write lock can only be granted if no readers or writers blocked @@ -333,10 +363,17 @@ void __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) { WT_RWLOCK new, old; + struct timespec enter, leave; + int64_t **stats; int pause_cnt; uint8_t ticket; + bool set_stats; WT_STAT_CONN_INCR(session, rwlock_write); + stats = (int64_t **)S2C(session)->stats; + set_stats = (l->stat_write_count_off != -1 && WT_STAT_ENABLED(session)); + if (set_stats) + stats[session->stat_bucket][l->stat_write_count_off]++; for (;;) { old.u.v = l->u.v; @@ -367,6 +404,8 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) * could see no readers active from a different batch and decide that * we have the lock. */ + if (set_stats) + __wt_epoch(session, &enter); for (pause_cnt = 0, old.u.v = l->u.v; ticket != old.u.s.current || old.u.s.readers_active != 0; pause_cnt++, old.u.v = l->u.v) { @@ -381,6 +420,15 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) l->cond_writers, 10 * WT_THOUSAND, __write_blocked); } } + if (set_stats) { + __wt_epoch(session, &leave); + if (F_ISSET(session, WT_SESSION_INTERNAL)) + stats[session->stat_bucket][l->stat_int_usecs_off] += + (int64_t)WT_TIMEDIFF_US(leave, enter); + else + stats[session->stat_bucket][l->stat_app_usecs_off] += + (int64_t)WT_TIMEDIFF_US(leave, enter); + } /* * Applications depend on a barrier here so that operations holding the diff --git a/src/support/stat.c b/src/support/stat.c index dcad5351fe8..2dc006da827 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -774,16 +774,20 @@ static const char * const __stats_connection_desc[] = { "lock: checkpoint lock acquisitions", "lock: checkpoint lock application thread wait time (usecs)", "lock: checkpoint lock internal thread wait time (usecs)", - "lock: handle-list lock eviction thread wait time (usecs)", + "lock: dhandle lock application thread time waiting for the dhandle lock (usecs)", + "lock: dhandle lock internal thread time waiting for the dhandle lock (usecs)", + "lock: dhandle read lock acquisitions", + "lock: dhandle write lock acquisitions", "lock: metadata lock acquisitions", "lock: metadata lock application thread wait time (usecs)", "lock: metadata lock internal thread wait time (usecs)", "lock: schema lock acquisitions", "lock: schema lock application thread wait time (usecs)", "lock: schema lock internal thread wait time (usecs)", - "lock: table lock acquisitions", "lock: table lock application thread time waiting for the table lock (usecs)", "lock: table lock internal thread time waiting for the table lock (usecs)", + "lock: table read lock acquisitions", + "lock: table write lock acquisitions", "log: busy returns attempting to switch slots", "log: log bytes of payload data", "log: log bytes written", @@ -1073,16 +1077,20 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->lock_checkpoint_count = 0; stats->lock_checkpoint_wait_application = 0; stats->lock_checkpoint_wait_internal = 0; - stats->lock_handle_list_wait_eviction = 0; + stats->lock_dhandle_wait_application = 0; + stats->lock_dhandle_wait_internal = 0; + stats->lock_dhandle_read_count = 0; + stats->lock_dhandle_write_count = 0; stats->lock_metadata_count = 0; stats->lock_metadata_wait_application = 0; stats->lock_metadata_wait_internal = 0; stats->lock_schema_count = 0; stats->lock_schema_wait_application = 0; stats->lock_schema_wait_internal = 0; - stats->lock_table_count = 0; stats->lock_table_wait_application = 0; stats->lock_table_wait_internal = 0; + stats->lock_table_read_count = 0; + stats->lock_table_write_count = 0; stats->log_slot_switch_busy = 0; stats->log_bytes_payload = 0; stats->log_bytes_written = 0; @@ -1398,8 +1406,14 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, lock_checkpoint_wait_application); to->lock_checkpoint_wait_internal += WT_STAT_READ(from, lock_checkpoint_wait_internal); - to->lock_handle_list_wait_eviction += - WT_STAT_READ(from, lock_handle_list_wait_eviction); + to->lock_dhandle_wait_application += + WT_STAT_READ(from, lock_dhandle_wait_application); + to->lock_dhandle_wait_internal += + WT_STAT_READ(from, lock_dhandle_wait_internal); + to->lock_dhandle_read_count += + WT_STAT_READ(from, lock_dhandle_read_count); + to->lock_dhandle_write_count += + WT_STAT_READ(from, lock_dhandle_write_count); to->lock_metadata_count += WT_STAT_READ(from, lock_metadata_count); to->lock_metadata_wait_application += WT_STAT_READ(from, lock_metadata_wait_application); @@ -1410,11 +1424,14 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, lock_schema_wait_application); to->lock_schema_wait_internal += WT_STAT_READ(from, lock_schema_wait_internal); - to->lock_table_count += WT_STAT_READ(from, lock_table_count); to->lock_table_wait_application += WT_STAT_READ(from, lock_table_wait_application); to->lock_table_wait_internal += WT_STAT_READ(from, lock_table_wait_internal); + to->lock_table_read_count += + WT_STAT_READ(from, lock_table_read_count); + to->lock_table_write_count += + WT_STAT_READ(from, lock_table_write_count); to->log_slot_switch_busy += WT_STAT_READ(from, log_slot_switch_busy); to->log_bytes_payload += WT_STAT_READ(from, log_bytes_payload); to->log_bytes_written += WT_STAT_READ(from, log_bytes_written); -- cgit v1.2.1 From 92c2258c2f49ef9d69b430dcbf32cab89942380a Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 7 Jun 2017 10:31:07 -0400 Subject: WT-3361 Resolve Windows build warnings, build more test programs on Windows (#3456) * WT-3361 Resolve Windows build warnings, build more test programs on Windows. Update SConstruct with entries for all test programs (not all build and/or run yet). Replace all references to pthread_create, pthread_join and pthread_t with __wt_thread_create, __wt_thread_join and wt_thread_t, that is, use the WiredTiger versions for Windows portability. * Don't try and smoke-test bloom as part of the pull builds, it won't necessarily run. * Make WT's thread create / join functions visible for ex_thread. * Build all test programs with shim and wtlibs, it's simpler and allows for future changes without touching the SConstruct file. --- SConstruct | 84 ++++++++++++------ bench/wtperf/idle_table_cycle.c | 54 +++++------- bench/wtperf/wtperf.c | 165 +++++++++++++---------------------- bench/wtperf/wtperf.h | 6 +- examples/c/ex_thread.c | 20 ++--- src/include/extern_posix.h | 4 +- src/os_posix/os_thread.c | 2 + src/support/err.c | 11 ++- test/checkpoint/checkpointer.c | 23 ++--- test/checkpoint/test_checkpoint.c | 10 +-- test/checkpoint/test_checkpoint.h | 10 +-- test/checkpoint/workers.c | 21 ++--- test/cursor_order/cursor_order.c | 3 +- test/cursor_order/cursor_order.h | 2 +- test/cursor_order/cursor_order_ops.c | 35 +++----- test/fops/fops.c | 21 ++--- test/fops/t.c | 3 +- test/fops/thread.h | 2 +- test/format/backup.c | 6 +- test/format/compact.c | 6 +- test/format/format.h | 10 +-- test/format/lrt.c | 4 +- test/format/ops.c | 32 ++++--- test/format/util.c | 4 +- test/recovery/random-abort.c | 13 ++- test/thread/rw.c | 35 +++----- test/thread/t.c | 3 +- test/thread/thread.h | 2 +- test/windows/windows_shim.c | 23 ----- 29 files changed, 260 insertions(+), 354 deletions(-) diff --git a/SConstruct b/SConstruct index b397f662be7..22f869e02e7 100644 --- a/SConstruct +++ b/SConstruct @@ -410,41 +410,37 @@ def builder_smoke_test(target, source, env): env.Append(BUILDERS={'SmokeTest' : Builder(action = builder_smoke_test)}) #Build the tests and setup the "scons test" target - testutil = env.Library('testutil', [ 'test/utility/misc.c', 'test/utility/parse_opts.c' ]) +env.Append(CPPPATH=["test/utility"]) -#Don't test bloom on Windows, its broken t = env.Program("t_bloom", "test/bloom/test_bloom.c", - LIBS=[wtlib, testutil] + wtlibs) -#env.Alias("check", env.SmokeTest(t)) + LIBS=[wtlib, shim, testutil] + wtlibs) Default(t) -#env.Program("t_checkpoint", - #["test/checkpoint/checkpointer.c", - #"test/checkpoint/test_checkpoint.c", - #"test/checkpoint/workers.c"], - #LIBS=[wtlib]) - -t = env.Program("t_huge", - "test/huge/huge.c", - LIBS=[wtlib] + wtlibs) +t = env.Program("t_checkpoint", + ["test/checkpoint/checkpointer.c", + "test/checkpoint/test_checkpoint.c", + "test/checkpoint/workers.c"], + LIBS=[wtlib, shim, testutil] + wtlibs) +Default(t) -#t = env.Program("t_recovery", -# "test/recovery/recovery.c", -# LIBS=[wtlib] + wtlibs) -#Default(t) +t = env.Program("t_cursor_order", + ["test/cursor_order/cursor_order.c", + "test/cursor_order/cursor_order_file.c", + "test/cursor_order/cursor_order_ops.c"], + LIBS=[wtlib, shim, testutil] + wtlibs) +Default(t) t = env.Program("t_fops", ["test/fops/file.c", "test/fops/fops.c", "test/fops/t.c"], LIBS=[wtlib, shim, testutil] + wtlibs) -env.Append(CPPPATH=["test/utility"]) Default(t) t = env.Program("t_format", @@ -459,19 +455,51 @@ t = env.Program("t_format", "test/format/t.c", "test/format/util.c", "test/format/wts.c"], - LIBS=[wtlib, shim, testutil] + wtlibs) + LIBS=[wtlib, shim, testutil] + wtlibs) +Default(t) + +t = env.Program("t_huge", + "test/huge/huge.c", + LIBS=[wtlib, shim, testutil] + wtlibs) Default(t) -#env.Program("t_thread", - #["test/thread/file.c", - #"test/thread/rw.c", - #"test/thread/stats.c", - #"test/thread/t.c"], - #LIBS=[wtlib]) +t = env.Program("t_manydbs", + "test/manydbs/manydbs.c", + LIBS=[wtlib, shim, testutil] + wtlibs) +Default(t) + +# t_readonly doesn't currently build/run. +#t = env.Program("t_readonly", +# "test/readonly/readonly.c", +# LIBS=[wtlib, shim, testutil] + wtlibs) +#Default(t) -#env.Program("t_salvage", - #["test/salvage/salvage.c"], - #LIBS=[wtlib]) +# t_random-abort doesn't currently build/run. +#t = env.Program("t_random-abort", +# "test/recovery/random-abort.c", +# LIBS=[wtlib, shim, testutil] + wtlibs) +#Default(t) + +# t_truncated-log doesn't currently build/run. +#t = env.Program("t_truncated-log", +# "test/recovery/truncated-log.c", +# LIBS=[wtlib, shim, testutil] + wtlibs) +#Default(t) + +# t_salvage-log doesn't currently build/run. +#t = env.Program("t_salvage", +# "test/salvage/salvage.c", +# LIBS=[wtlib, shim, testutil] + wtlibs) +#Default(t) + +# t_thread doesn't currently build/run. +#t = env.Program("t_thread", +# ["test/thread/file.c", +# "test/thread/rw.c", +# "test/thread/stats.c", +# "test/thread/t.c"], +# LIBS=[wtlib, shim, testutil] + wtlibs) +#Default(t) t = env.Program("wtperf", [ "bench/wtperf/config.c", diff --git a/bench/wtperf/idle_table_cycle.c b/bench/wtperf/idle_table_cycle.c index ce64049ce89..d0baa786ba9 100644 --- a/bench/wtperf/idle_table_cycle.c +++ b/bench/wtperf/idle_table_cycle.c @@ -57,7 +57,7 @@ check_timing(WTPERF *wtperf, * Measure how long each step takes, and flag an error if it exceeds the * configured maximum. */ -static void * +static WT_THREAD_RET cycle_idle_tables(void *arg) { struct timespec start, stop; @@ -76,7 +76,7 @@ cycle_idle_tables(void *arg) wtperf->conn, NULL, opts->sess_config, &session)) != 0) { lprintf(wtperf, ret, 0, "Error opening a session on %s", wtperf->home); - return (NULL); + return (WT_THREAD_RET_VALUE); } for (cycle_count = 0; wtperf->idle_cycle_run; ++cycle_count) { @@ -96,10 +96,10 @@ cycle_idle_tables(void *arg) lprintf(wtperf, ret, 0, "Table create failed in cycle_idle_tables."); wtperf->error = true; - return (NULL); + return (WT_THREAD_RET_VALUE); } if (check_timing(wtperf, "create", start, &stop) != 0) - return (NULL); + return (WT_THREAD_RET_VALUE); start = stop; /* Open and close cursor. */ @@ -108,16 +108,16 @@ cycle_idle_tables(void *arg) lprintf(wtperf, ret, 0, "Cursor open failed in cycle_idle_tables."); wtperf->error = true; - return (NULL); + return (WT_THREAD_RET_VALUE); } if ((ret = cursor->close(cursor)) != 0) { lprintf(wtperf, ret, 0, "Cursor close failed in cycle_idle_tables."); wtperf->error = true; - return (NULL); + return (WT_THREAD_RET_VALUE); } if (check_timing(wtperf, "cursor", start, &stop) != 0) - return (NULL); + return (WT_THREAD_RET_VALUE); start = stop; #if 1 @@ -133,14 +133,14 @@ cycle_idle_tables(void *arg) lprintf(wtperf, ret, 0, "Table drop failed in cycle_idle_tables."); wtperf->error = true; - return (NULL); + return (WT_THREAD_RET_VALUE); } if (check_timing(wtperf, "drop", start, &stop) != 0) - return (NULL); + return (WT_THREAD_RET_VALUE); #endif } - return (NULL); + return (WT_THREAD_RET_VALUE); } /* @@ -150,47 +150,33 @@ cycle_idle_tables(void *arg) * structure. Should reshuffle the configuration structure so explicit static * initialization isn't necessary. */ -int -start_idle_table_cycle(WTPERF *wtperf, pthread_t *idle_table_cycle_thread) +void +start_idle_table_cycle(WTPERF *wtperf, wt_thread_t *idle_table_cycle_thread) { CONFIG_OPTS *opts; - pthread_t thread_id; - int ret; + wt_thread_t thread_id; opts = wtperf->opts; if (opts->idle_table_cycle == 0) - return (0); + return; wtperf->idle_cycle_run = true; - if ((ret = pthread_create( - &thread_id, NULL, cycle_idle_tables, wtperf)) != 0) { - lprintf(wtperf, - ret, 0, "Error creating idle table cycle thread."); - wtperf->idle_cycle_run = false; - return (ret); - } + testutil_check(__wt_thread_create( + NULL, &thread_id, cycle_idle_tables, wtperf)); *idle_table_cycle_thread = thread_id; - - return (0); } -int -stop_idle_table_cycle(WTPERF *wtperf, pthread_t idle_table_cycle_thread) +void +stop_idle_table_cycle(WTPERF *wtperf, wt_thread_t idle_table_cycle_thread) { CONFIG_OPTS *opts; - int ret; opts = wtperf->opts; if (opts->idle_table_cycle == 0 || !wtperf->idle_cycle_run) - return (0); + return; wtperf->idle_cycle_run = false; - if ((ret = pthread_join(idle_table_cycle_thread, NULL)) != 0) { - lprintf( - wtperf, ret, 0, "Error joining idle table cycle thread."); - return (ret); - } - return (0); + testutil_check(__wt_thread_join(NULL, idle_table_cycle_thread)); } diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 68bc08226c2..a8d3f135280 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -32,23 +32,23 @@ #define DEFAULT_HOME "WT_TEST" #define DEFAULT_MONITOR_DIR "WT_TEST" -static void *checkpoint_worker(void *); +static WT_THREAD_RET checkpoint_worker(void *); static int drop_all_tables(WTPERF *); static int execute_populate(WTPERF *); static int execute_workload(WTPERF *); static int find_table_count(WTPERF *); -static void *monitor(void *); -static void *populate_thread(void *); +static WT_THREAD_RET monitor(void *); +static WT_THREAD_RET populate_thread(void *); static void randomize_value(WTPERF_THREAD *, char *); static void recreate_dir(const char *); static int start_all_runs(WTPERF *); static int start_run(WTPERF *); -static int start_threads(WTPERF *, - WORKLOAD *, WTPERF_THREAD *, u_int, void *(*)(void *)); -static int stop_threads(WTPERF *, u_int, WTPERF_THREAD *); -static void *thread_run_wtperf(void *); +static void start_threads(WTPERF *, WORKLOAD *, + WTPERF_THREAD *, u_int, WT_THREAD_CALLBACK(*)(void *)); +static void stop_threads(u_int, WTPERF_THREAD *); +static WT_THREAD_RET thread_run_wtperf(void *); static void update_value_delta(WTPERF_THREAD *); -static void *worker(void *); +static WT_THREAD_RET worker(void *); static uint64_t wtperf_rand(WTPERF_THREAD *); static uint64_t wtperf_value_range(WTPERF *); @@ -312,7 +312,7 @@ op_name(uint8_t *op) /* NOTREACHED */ } -static void * +static WT_THREAD_RET worker_async(void *arg) { CONFIG_OPTS *opts; @@ -420,7 +420,7 @@ op_err: lprintf(wtperf, ret, 0, if (0) { err: wtperf->error = wtperf->stop = true; } - return (NULL); + return (WT_THREAD_RET_VALUE); } /* @@ -513,7 +513,7 @@ err: lprintf(wtperf, ret, 0, "Pre-workload traverse error"); return (ret); } -static void * +static WT_THREAD_RET worker(void *arg) { struct timespec start, stop; @@ -893,7 +893,7 @@ err: wtperf->error = wtperf->stop = true; } free(cursors); - return (NULL); + return (WT_THREAD_RET_VALUE); } /* @@ -1014,7 +1014,7 @@ run_mix_schedule(WTPERF *wtperf, WORKLOAD *workp) return (0); } -static void * +static WT_THREAD_RET populate_thread(void *arg) { struct timespec start, stop; @@ -1163,10 +1163,10 @@ err: wtperf->error = wtperf->stop = true; } free(cursors); - return (NULL); + return (WT_THREAD_RET_VALUE); } -static void * +static WT_THREAD_RET populate_async(void *arg) { struct timespec start, stop; @@ -1261,10 +1261,10 @@ populate_async(void *arg) if (0) { err: wtperf->error = wtperf->stop = true; } - return (NULL); + return (WT_THREAD_RET_VALUE); } -static void * +static WT_THREAD_RET monitor(void *arg) { struct timespec t; @@ -1426,10 +1426,10 @@ err: wtperf->error = wtperf->stop = true; (void)fclose(fp); free(path); - return (NULL); + return (WT_THREAD_RET_VALUE); } -static void * +static WT_THREAD_RET checkpoint_worker(void *arg) { CONFIG_OPTS *opts; @@ -1490,7 +1490,7 @@ checkpoint_worker(void *arg) err: wtperf->error = wtperf->stop = true; } - return (NULL); + return (WT_THREAD_RET_VALUE); } static int @@ -1498,15 +1498,15 @@ execute_populate(WTPERF *wtperf) { struct timespec start, stop; CONFIG_OPTS *opts; - WTPERF_THREAD *popth; WT_ASYNC_OP *asyncop; - pthread_t idle_table_cycle_thread; + WTPERF_THREAD *popth; + WT_THREAD_CALLBACK(*pfunc)(void *); size_t i; uint64_t last_ops, msecs, print_ops_sec; uint32_t interval, tables; + wt_thread_t idle_table_cycle_thread; double print_secs; int elapsed, ret; - void *(*pfunc)(void *); opts = wtperf->opts; @@ -1516,9 +1516,7 @@ execute_populate(WTPERF *wtperf) opts->populate_threads, opts->icount); /* Start cycling idle tables if configured. */ - if ((ret = - start_idle_table_cycle(wtperf, &idle_table_cycle_thread)) != 0) - return (ret); + start_idle_table_cycle(wtperf, &idle_table_cycle_thread); wtperf->insert_key = 0; @@ -1530,9 +1528,8 @@ execute_populate(WTPERF *wtperf) pfunc = populate_async; } else pfunc = populate_thread; - if ((ret = start_threads(wtperf, NULL, - wtperf->popthreads, opts->populate_threads, pfunc)) != 0) - return (ret); + start_threads(wtperf, NULL, + wtperf->popthreads, opts->populate_threads, pfunc); __wt_epoch(NULL, &start); for (elapsed = 0, interval = 0, last_ops = 0; @@ -1568,10 +1565,8 @@ execute_populate(WTPERF *wtperf) */ popth = wtperf->popthreads; wtperf->popthreads = NULL; - ret = stop_threads(wtperf, opts->populate_threads, popth); + stop_threads(opts->populate_threads, popth); free(popth); - if (ret != 0) - return (ret); /* Report if any worker threads didn't finish. */ if (wtperf->error) { @@ -1640,8 +1635,7 @@ execute_populate(WTPERF *wtperf) } /* Stop cycling idle tables. */ - if ((ret = stop_idle_table_cycle(wtperf, idle_table_cycle_thread)) != 0) - return (ret); + stop_idle_table_cycle(wtperf, idle_table_cycle_thread); return (0); } @@ -1701,13 +1695,13 @@ execute_workload(WTPERF *wtperf) WTPERF_THREAD *threads; WT_CONNECTION *conn; WT_SESSION **sessions; - pthread_t idle_table_cycle_thread; + WT_THREAD_CALLBACK(*pfunc)(void *); + wt_thread_t idle_table_cycle_thread; uint64_t last_ckpts, last_inserts, last_reads, last_truncates; uint64_t last_updates; uint32_t interval, run_ops, run_time; u_int i; - int ret, t_ret; - void *(*pfunc)(void *); + int ret; opts = wtperf->opts; @@ -1722,9 +1716,7 @@ execute_workload(WTPERF *wtperf) sessions = NULL; /* Start cycling idle tables. */ - if ((ret = - start_idle_table_cycle(wtperf, &idle_table_cycle_thread)) != 0) - return (ret); + start_idle_table_cycle(wtperf, &idle_table_cycle_thread); if (opts->warmup != 0) wtperf->in_warmup = true; @@ -1768,9 +1760,8 @@ execute_workload(WTPERF *wtperf) goto err; /* Start the workload's threads. */ - if ((ret = start_threads( - wtperf, workp, threads, (u_int)workp->threads, pfunc)) != 0) - goto err; + start_threads( + wtperf, workp, threads, (u_int)workp->threads, pfunc); threads += workp->threads; } @@ -1836,12 +1827,9 @@ execute_workload(WTPERF *wtperf) err: wtperf->stop = true; /* Stop cycling idle tables. */ - if ((ret = stop_idle_table_cycle(wtperf, idle_table_cycle_thread)) != 0) - return (ret); + stop_idle_table_cycle(wtperf, idle_table_cycle_thread); - if ((t_ret = stop_threads(wtperf, - (u_int)wtperf->workers_cnt, wtperf->workers)) != 0 && ret == 0) - ret = t_ret; + stop_threads((u_int)wtperf->workers_cnt, wtperf->workers); /* Drop tables if configured to and this isn't an error path */ if (ret == 0 && @@ -2163,9 +2151,9 @@ start_all_runs(WTPERF *wtperf) { CONFIG_OPTS *opts; WTPERF *next_wtperf, **wtperfs; - pthread_t *threads; size_t i, len; - int ret, t_ret; + wt_thread_t *threads; + int ret; opts = wtperf->opts; wtperfs = NULL; @@ -2178,7 +2166,7 @@ start_all_runs(WTPERF *wtperf) wtperfs = dcalloc(opts->database_count, sizeof(WTPERF *)); /* Allocate an array to hold our thread IDs. */ - threads = dcalloc(opts->database_count, sizeof(pthread_t)); + threads = dcalloc(opts->database_count, sizeof(*threads)); for (i = 0; i < opts->database_count; i++) { wtperf_copy(wtperf, &next_wtperf); @@ -2203,22 +2191,15 @@ start_all_runs(WTPERF *wtperf) strcmp(next_wtperf->home, next_wtperf->monitor_dir) != 0) recreate_dir(next_wtperf->monitor_dir); - if ((ret = pthread_create( - &threads[i], NULL, thread_run_wtperf, next_wtperf)) != 0) { - lprintf(wtperf, ret, 0, "Error creating thread"); - goto err; - } + testutil_check(__wt_thread_create(NULL, + &threads[i], thread_run_wtperf, next_wtperf)); } /* Wait for threads to finish. */ for (i = 0; i < opts->database_count; i++) - if ((t_ret = pthread_join(threads[i], NULL)) != 0) { - lprintf(wtperf, ret, 0, "Error joining thread"); - if (ret == 0) - ret = t_ret; - } + testutil_check(__wt_thread_join(NULL, threads[i])); -err: for (i = 0; i < opts->database_count && wtperfs[i] != NULL; i++) { + for (i = 0; i < opts->database_count && wtperfs[i] != NULL; i++) { wtperf_free(wtperfs[i]); free(wtperfs[i]); } @@ -2229,7 +2210,7 @@ err: for (i = 0; i < opts->database_count && wtperfs[i] != NULL; i++) { } /* Run an instance of wtperf for a given configuration. */ -static void * +static WT_THREAD_RET thread_run_wtperf(void *arg) { WTPERF *wtperf; @@ -2238,14 +2219,14 @@ thread_run_wtperf(void *arg) wtperf = (WTPERF *)arg; if ((ret = start_run(wtperf)) != 0) lprintf(wtperf, ret, 0, "Run failed for: %s.", wtperf->home); - return (NULL); + return (WT_THREAD_RET_VALUE); } static int start_run(WTPERF *wtperf) { CONFIG_OPTS *opts; - pthread_t monitor_thread; + wt_thread_t monitor_thread; uint64_t total_ops; uint32_t run_time; int monitor_created, ret, t_ret; @@ -2272,12 +2253,8 @@ start_run(WTPERF *wtperf) /* Start the monitor thread. */ if (opts->sample_interval != 0) { - if ((ret = pthread_create( - &monitor_thread, NULL, monitor, wtperf)) != 0) { - lprintf(wtperf, - ret, 0, "Error creating monitor thread."); - goto err; - } + testutil_check(__wt_thread_create( + NULL, &monitor_thread, monitor, wtperf)); monitor_created = 1; } @@ -2306,9 +2283,8 @@ start_run(WTPERF *wtperf) opts->checkpoint_threads); wtperf->ckptthreads = dcalloc( opts->checkpoint_threads, sizeof(WTPERF_THREAD)); - if (start_threads(wtperf, NULL, wtperf->ckptthreads, - opts->checkpoint_threads, checkpoint_worker) != 0) - goto err; + start_threads(wtperf, NULL, wtperf->ckptthreads, + opts->checkpoint_threads, checkpoint_worker); } if (opts->pre_load_data && (ret = pre_load_data(wtperf)) != 0) goto err; @@ -2362,16 +2338,10 @@ err: if (ret == 0) /* Notify the worker threads they are done. */ wtperf->stop = true; - if ((t_ret = stop_threads(wtperf, 1, wtperf->ckptthreads)) != 0) - if (ret == 0) - ret = t_ret; + stop_threads(1, wtperf->ckptthreads); - if (monitor_created != 0 && - (t_ret = pthread_join(monitor_thread, NULL)) != 0) { - lprintf(wtperf, ret, 0, "Error joining monitor thread."); - if (ret == 0) - ret = t_ret; - } + if (monitor_created != 0) + testutil_check(__wt_thread_join(NULL, monitor_thread)); if (wtperf->conn != NULL && opts->close_conn && (t_ret = wtperf->conn->close(wtperf->conn, NULL)) != 0) { @@ -2728,14 +2698,13 @@ err: wtperf_free(wtperf); return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); } -static int -start_threads(WTPERF *wtperf, - WORKLOAD *workp, WTPERF_THREAD *base, u_int num, void *(*func)(void *)) +static void +start_threads(WTPERF *wtperf, WORKLOAD *workp, + WTPERF_THREAD *base, u_int num, WT_THREAD_CALLBACK(*func)(void *)) { CONFIG_OPTS *opts; WTPERF_THREAD *thread; u_int i; - int ret; opts = wtperf->opts; @@ -2779,29 +2748,20 @@ start_threads(WTPERF *wtperf, /* Start the threads. */ for (i = 0, thread = base; i < num; ++i, ++thread) - if ((ret = pthread_create( - &thread->handle, NULL, func, thread)) != 0) { - lprintf(wtperf, ret, 0, "Error creating thread"); - return (ret); - } - - return (0); + testutil_check(__wt_thread_create( + NULL, &thread->handle, func, thread)); } -static int -stop_threads(WTPERF *wtperf, u_int num, WTPERF_THREAD *threads) +static void +stop_threads(u_int num, WTPERF_THREAD *threads) { u_int i; - int ret; if (num == 0 || threads == NULL) - return (0); + return; for (i = 0; i < num; ++i, ++threads) { - if ((ret = pthread_join(threads->handle, NULL)) != 0) { - lprintf(wtperf, ret, 0, "Error joining thread"); - return (ret); - } + testutil_check(__wt_thread_join(NULL, threads->handle)); free(threads->key_buf); threads->key_buf = NULL; @@ -2815,7 +2775,6 @@ stop_threads(WTPERF *wtperf, u_int num, WTPERF_THREAD *threads) * being read by the monitor thread (among others). As a standalone * program, leaking memory isn't a concern, and it's simpler that way. */ - return (0); } static void diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h index bd6c1e829ba..b17d082ddcf 100644 --- a/bench/wtperf/wtperf.h +++ b/bench/wtperf/wtperf.h @@ -232,7 +232,7 @@ struct __wtperf_thread { /* Per-thread structure */ WT_RAND_STATE rnd; /* Random number generation state */ - pthread_t handle; /* Handle */ + wt_thread_t handle; /* Handle */ char *key_buf, *value_buf; /* Key/value memory */ @@ -269,8 +269,8 @@ int run_truncate( int setup_log_file(WTPERF *); void setup_throttle(WTPERF_THREAD *); int setup_truncate(WTPERF *, WTPERF_THREAD *, WT_SESSION *); -int start_idle_table_cycle(WTPERF *, pthread_t *); -int stop_idle_table_cycle(WTPERF *, pthread_t); +void start_idle_table_cycle(WTPERF *, wt_thread_t *); +void stop_idle_table_cycle(WTPERF *, wt_thread_t); void worker_throttle(WTPERF_THREAD *); uint64_t sum_ckpt_ops(WTPERF *); uint64_t sum_insert_ops(WTPERF *); diff --git a/examples/c/ex_thread.c b/examples/c/ex_thread.c index b69b3e9e7e9..ad2ff7f68a0 100644 --- a/examples/c/ex_thread.c +++ b/examples/c/ex_thread.c @@ -34,22 +34,14 @@ #include #include -#ifndef _WIN32 -#include -#else -#include "windows_shim.h" -#endif - -#include +#include "wt_internal.h" static const char *home; -void *scan_thread(void *arg); - #define NUM_THREADS 10 /*! [thread scan] */ -void * +static WT_THREAD_RET scan_thread(void *conn_arg) { WT_CONNECTION *conn; @@ -74,7 +66,7 @@ scan_thread(void *conn_arg) fprintf(stderr, "WT_CURSOR.next: %s\n", session->strerror(session, ret)); - return (NULL); + return (WT_THREAD_RET_VALUE); } /*! [thread scan] */ @@ -85,7 +77,7 @@ main(void) WT_CONNECTION *conn; WT_SESSION *session; WT_CURSOR *cursor; - pthread_t threads[NUM_THREADS]; + wt_thread_t threads[NUM_THREADS]; int i, ret; /* @@ -114,10 +106,10 @@ main(void) ret = session->close(session, NULL); for (i = 0; i < NUM_THREADS; i++) - ret = pthread_create(&threads[i], NULL, scan_thread, conn); + ret = __wt_thread_create(NULL, &threads[i], scan_thread, conn); for (i = 0; i < NUM_THREADS; i++) - ret = pthread_join(threads[i], NULL); + ret = __wt_thread_join(NULL, threads[i]); ret = conn->close(conn, NULL); diff --git a/src/include/extern_posix.h b/src/include/extern_posix.h index c0ed056c7b6..b6b5ac51f73 100644 --- a/src/include/extern_posix.h +++ b/src/include/extern_posix.h @@ -25,8 +25,8 @@ extern void __wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((vi extern void __wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_yield(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); diff --git a/src/os_posix/os_thread.c b/src/os_posix/os_thread.c index dfcf297c239..8af672dd0d4 100644 --- a/src/os_posix/os_thread.c +++ b/src/os_posix/os_thread.c @@ -15,6 +15,7 @@ int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) + WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) { WT_DECL_RET; @@ -40,6 +41,7 @@ __wt_thread_create(WT_SESSION_IMPL *session, */ int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) + WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) { WT_DECL_RET; diff --git a/src/support/err.c b/src/support/err.c index 7f6c835ab29..5ec995d8f65 100644 --- a/src/support/err.c +++ b/src/support/err.c @@ -502,8 +502,12 @@ __wt_panic(WT_SESSION_IMPL *session) #if defined(HAVE_DIAGNOSTIC) __wt_abort(session); /* Drop core if testing. */ /* NOTREACHED */ -#else +#endif +#if !defined(HAVE_DIAGNOSTIC) || defined(_WIN32) /* + * Confusing #ifdef structure because gcc knows we can't get here and + * Visual Studio doesn't. + * * Chaos reigns within. * Reflect, repent, and reboot. * Order shall return. @@ -525,12 +529,7 @@ __wt_illegal_value(WT_SESSION_IMPL *session, const char *name) name == NULL ? "" : name, name == NULL ? "" : ": ", "encountered an illegal file format or internal value"); -#if defined(HAVE_DIAGNOSTIC) - __wt_abort(session); /* Drop core if testing. */ - /* NOTREACHED */ -#else return (__wt_panic(session)); -#endif } /* diff --git a/test/checkpoint/checkpointer.c b/test/checkpoint/checkpointer.c index 634a8db9124..3135caa8cad 100644 --- a/test/checkpoint/checkpointer.c +++ b/test/checkpoint/checkpointer.c @@ -28,7 +28,7 @@ #include "test_checkpoint.h" -static void *checkpointer(void *); +static WT_THREAD_RET checkpointer(void *); static int compare_cursors( WT_CURSOR *, const char *, WT_CURSOR *, const char *); static int diagnose_key_error(WT_CURSOR *, int, WT_CURSOR *, int); @@ -39,35 +39,28 @@ static int verify_checkpoint(WT_SESSION *); * start_checkpoints -- * Responsible for creating the checkpoint thread. */ -int +void start_checkpoints(void) { - int ret; - - if ((ret = pthread_create( - &g.checkpoint_thread, NULL, checkpointer, NULL)) != 0) - return (log_print_err("pthread_create", ret, 1)); - return (0); + testutil_check(__wt_thread_create(NULL, + &g.checkpoint_thread, checkpointer, NULL)); } /* * end_checkpoints -- * Responsible for cleanly shutting down the checkpoint thread. */ -int +void end_checkpoints(void) { - void *thread_ret; - - return (pthread_join(g.checkpoint_thread, &thread_ret)); - + testutil_check(__wt_thread_join(NULL, g.checkpoint_thread)); } /* * checkpointer -- * Checkpoint thread start function. */ -static void * +static WT_THREAD_RET checkpointer(void *arg) { char tid[128]; @@ -78,7 +71,7 @@ checkpointer(void *arg) printf("checkpointer thread starting: tid: %s\n", tid); (void)real_checkpointer(); - return (NULL); + return (WT_THREAD_RET_VALUE); } /* diff --git a/test/checkpoint/test_checkpoint.c b/test/checkpoint/test_checkpoint.c index ca13c2bc4ec..cfe5ef1bad4 100644 --- a/test/checkpoint/test_checkpoint.c +++ b/test/checkpoint/test_checkpoint.c @@ -150,20 +150,14 @@ main(int argc, char *argv[]) break; } - if ((ret = start_checkpoints()) != 0) { - (void)log_print_err("Start checkpoints failed", ret, 1); - break; - } + start_checkpoints(); if ((ret = start_workers(ttype)) != 0) { (void)log_print_err("Start workers failed", ret, 1); break; } g.running = 0; - if ((ret = end_checkpoints()) != 0) { - (void)log_print_err("Start workers failed", ret, 1); - break; - } + end_checkpoints(); free(g.cookies); g.cookies = NULL; diff --git a/test/checkpoint/test_checkpoint.h b/test/checkpoint/test_checkpoint.h index 223b580c611..36551211b7e 100644 --- a/test/checkpoint/test_checkpoint.h +++ b/test/checkpoint/test_checkpoint.h @@ -64,12 +64,12 @@ typedef struct { int running; /* Whether to stop */ int status; /* Exit status */ COOKIE *cookies; /* Per-thread info */ - pthread_t checkpoint_thread; /* Checkpoint thread */ + wt_thread_t checkpoint_thread; /* Checkpoint thread */ } GLOBAL; extern GLOBAL g; -int end_checkpoints(void); -int log_print_err(const char *, int, int); -int start_checkpoints(void); -int start_workers(table_type); +void end_checkpoints(void); +int log_print_err(const char *, int, int); +void start_checkpoints(void); +int start_workers(table_type); const char *type_to_string(table_type); diff --git a/test/checkpoint/workers.c b/test/checkpoint/workers.c index 520266adf55..724475926ee 100644 --- a/test/checkpoint/workers.c +++ b/test/checkpoint/workers.c @@ -29,7 +29,7 @@ #include "test_checkpoint.h" static int real_worker(void); -static void *worker(void *); +static WT_THREAD_RET worker(void *); /* * create_table -- @@ -64,9 +64,8 @@ start_workers(table_type type) WT_SESSION *session; struct timeval start, stop; double seconds; - pthread_t *tids; + wt_thread_t *tids; int i, ret; - void *thread_ret; ret = 0; @@ -98,17 +97,13 @@ start_workers(table_type type) (void)gettimeofday(&start, NULL); /* Create threads. */ - for (i = 0; i < g.nworkers; ++i) { - if ((ret = pthread_create( - &tids[i], NULL, worker, &g.cookies[i])) != 0) { - (void)log_print_err("pthread_create", ret, 1); - goto err; - } - } + for (i = 0; i < g.nworkers; ++i) + testutil_check(__wt_thread_create( + NULL, &tids[i], worker, &g.cookies[i])); /* Wait for the threads. */ for (i = 0; i < g.nworkers; ++i) - (void)pthread_join(tids[i], &thread_ret); + testutil_check(__wt_thread_join(NULL, tids[i])); (void)gettimeofday(&stop, NULL); seconds = (stop.tv_sec - start.tv_sec) + @@ -146,7 +141,7 @@ worker_op(WT_CURSOR *cursor, uint64_t keyno, u_int new_val) * worker -- * Worker thread start function. */ -static void * +static WT_THREAD_RET worker(void *arg) { char tid[128]; @@ -157,7 +152,7 @@ worker(void *arg) printf("worker thread starting: tid: %s\n", tid); (void)real_worker(); - return (NULL); + return (WT_THREAD_RET_VALUE); } /* diff --git a/test/cursor_order/cursor_order.c b/test/cursor_order/cursor_order.c index 2cbca9baf0e..336ee54db63 100644 --- a/test/cursor_order/cursor_order.c +++ b/test/cursor_order/cursor_order.c @@ -158,8 +158,7 @@ main(int argc, char *argv[]) wt_connect(cfg, config_open); /* WiredTiger connection */ - if (ops_start(cfg)) - return (EXIT_FAILURE); + ops_start(cfg); wt_shutdown(cfg); /* WiredTiger shut down */ } diff --git a/test/cursor_order/cursor_order.h b/test/cursor_order/cursor_order.h index 4f9240f77e8..ab9f94850df 100644 --- a/test/cursor_order/cursor_order.h +++ b/test/cursor_order/cursor_order.h @@ -50,5 +50,5 @@ typedef struct { } SHARED_CONFIG; void load(SHARED_CONFIG *, const char *); -int ops_start(SHARED_CONFIG *); +void ops_start(SHARED_CONFIG *); void verify(SHARED_CONFIG *, const char *); diff --git a/test/cursor_order/cursor_order_ops.c b/test/cursor_order/cursor_order_ops.c index 5c6cfe363b6..cdd5af1a9ef 100644 --- a/test/cursor_order/cursor_order_ops.c +++ b/test/cursor_order/cursor_order_ops.c @@ -28,9 +28,9 @@ #include "cursor_order.h" -static void *append_insert(void *); +static WT_THREAD_RET append_insert(void *); static void print_stats(SHARED_CONFIG *); -static void *reverse_scan(void *); +static WT_THREAD_RET reverse_scan(void *); typedef struct { char *name; /* object name */ @@ -45,15 +45,13 @@ typedef struct { static INFO *run_info; -int +void ops_start(SHARED_CONFIG *cfg) { struct timeval start, stop; double seconds; - pthread_t *tids; + wt_thread_t *tids; uint64_t i, name_index, offset, total_nops; - int ret; - void *thread_ret; tids = NULL; /* Keep GCC 4.1 happy. */ total_nops = 0; @@ -114,18 +112,15 @@ ops_start(SHARED_CONFIG *cfg) /* Create threads. */ for (i = 0; i < cfg->reverse_scanners; ++i) - if ((ret = pthread_create( - &tids[i], NULL, reverse_scan, (void *)(uintptr_t)i)) != 0) - testutil_die(ret, "pthread_create"); - for (; i < cfg->reverse_scanners + cfg->append_inserters; ++i) { - if ((ret = pthread_create( - &tids[i], NULL, append_insert, (void *)(uintptr_t)i)) != 0) - testutil_die(ret, "pthread_create"); - } + testutil_check(__wt_thread_create(NULL, + &tids[i], reverse_scan, (void *)(uintptr_t)i)); + for (; i < cfg->reverse_scanners + cfg->append_inserters; ++i) + testutil_check(__wt_thread_create(NULL, + &tids[i], append_insert, (void *)(uintptr_t)i)); /* Wait for the threads. */ for (i = 0; i < cfg->reverse_scanners + cfg->append_inserters; ++i) - (void)pthread_join(tids[i], &thread_ret); + testutil_check(__wt_thread_join(NULL, tids[i])); (void)gettimeofday(&stop, NULL); seconds = (stop.tv_sec - start.tv_sec) + @@ -154,8 +149,6 @@ ops_start(SHARED_CONFIG *cfg) free(run_info); free(tids); - - return (0); } /* @@ -217,7 +210,7 @@ reverse_scan_op( * reverse_scan -- * Reader thread start function. */ -static void * +static WT_THREAD_RET reverse_scan(void *arg) { INFO *s; @@ -260,7 +253,7 @@ reverse_scan(void *arg) /* Notify all other threads to finish once the first thread is done */ cfg->thread_finish = true; - return (NULL); + return (WT_THREAD_RET_VALUE); } /* @@ -307,7 +300,7 @@ append_insert_op( * append_insert -- * Writer thread start function. */ -static void * +static WT_THREAD_RET append_insert(void *arg) { INFO *s; @@ -347,7 +340,7 @@ append_insert(void *arg) /* Notify all other threads to finish once the first thread is done */ cfg->thread_finish = true; - return (NULL); + return (WT_THREAD_RET_VALUE); } /* diff --git a/test/fops/fops.c b/test/fops/fops.c index 571b7dd59fa..911bfba55ad 100644 --- a/test/fops/fops.c +++ b/test/fops/fops.c @@ -28,7 +28,7 @@ #include "thread.h" -static void *fop(void *); +static WT_THREAD_RET fop(void *); static void print_stats(u_int); typedef struct { @@ -46,15 +46,13 @@ typedef struct { static STATS *run_stats; -int +void fop_start(u_int nthreads) { struct timeval start, stop; double seconds; - pthread_t *tids; + wt_thread_t *tids; u_int i; - int ret; - void *thread_ret; tids = NULL; /* Silence GCC 4.1 warning. */ @@ -66,13 +64,12 @@ fop_start(u_int nthreads) /* Create threads. */ for (i = 0; i < nthreads; ++i) - if ((ret = pthread_create( - &tids[i], NULL, fop, (void *)(uintptr_t)i)) != 0) - testutil_die(ret, "pthread_create"); + testutil_check(__wt_thread_create( + NULL, &tids[i], fop, (void *)(uintptr_t)i)); /* Wait for the threads. */ for (i = 0; i < nthreads; ++i) - (void)pthread_join(tids[i], &thread_ret); + testutil_check(__wt_thread_join(NULL, tids[i])); (void)gettimeofday(&stop, NULL); seconds = (stop.tv_sec - start.tv_sec) + @@ -84,15 +81,13 @@ fop_start(u_int nthreads) free(run_stats); free(tids); - - return (0); } /* * fop -- * File operation function. */ -static void * +static WT_THREAD_RET fop(void *arg) { STATS *s; @@ -150,7 +145,7 @@ fop(void *arg) break; } - return (NULL); + return (WT_THREAD_RET_VALUE); } /* diff --git a/test/fops/t.c b/test/fops/t.c index a481c9ff1c4..2357b170e49 100644 --- a/test/fops/t.c +++ b/test/fops/t.c @@ -129,8 +129,7 @@ main(int argc, char *argv[]) wt_startup(config_open); - if (fop_start(nthreads)) - return (EXIT_FAILURE); + fop_start(nthreads); wt_shutdown(); printf("\n"); diff --git a/test/fops/thread.h b/test/fops/thread.h index 9c1fb0150a6..f6b6bdffd63 100644 --- a/test/fops/thread.h +++ b/test/fops/thread.h @@ -39,7 +39,7 @@ extern const char *config; /* Object config */ extern pthread_rwlock_t single; /* Single-thread */ -int fop_start(u_int); +void fop_start(u_int); void obj_bulk(void); void obj_bulk_unique(int); void obj_checkpoint(void); diff --git a/test/format/backup.c b/test/format/backup.c index ce8b8fed6bd..47f3c54325f 100644 --- a/test/format/backup.c +++ b/test/format/backup.c @@ -83,7 +83,7 @@ copy_file(WT_SESSION *session, const char *name) * backup -- * Periodically do a backup and verify it. */ -void * +WT_THREAD_RET backup(void *arg) { WT_CONNECTION *conn; @@ -100,7 +100,7 @@ backup(void *arg) /* Backups aren't supported for non-standard data sources. */ if (DATASOURCE("helium") || DATASOURCE("kvsbdb")) - return (NULL); + return (WT_THREAD_RET_VALUE); /* Open a session. */ testutil_check(conn->open_session(conn, NULL, NULL, &session)); @@ -188,5 +188,5 @@ backup(void *arg) testutil_check(session->close(session, NULL)); - return (NULL); + return (WT_THREAD_RET_VALUE); } diff --git a/test/format/compact.c b/test/format/compact.c index 00aed4c10f0..f2fa7521946 100644 --- a/test/format/compact.c +++ b/test/format/compact.c @@ -32,7 +32,7 @@ * compaction -- * Periodically do a compaction operation. */ -void * +WT_THREAD_RET compact(void *arg) { WT_CONNECTION *conn; @@ -44,7 +44,7 @@ compact(void *arg) /* Compaction isn't supported for all data sources. */ if (DATASOURCE("helium") || DATASOURCE("kvsbdb")) - return (NULL); + return (WT_THREAD_RET_VALUE); /* Open a session. */ conn = g.wts_conn; @@ -70,5 +70,5 @@ compact(void *arg) testutil_check(session->close(session, NULL)); - return (NULL); + return (WT_THREAD_RET_VALUE); } diff --git a/test/format/format.h b/test/format/format.h index 104ee1553f4..602c1cc6d59 100644 --- a/test/format/format.h +++ b/test/format/format.h @@ -259,7 +259,7 @@ typedef struct { uint64_t deadlock; int id; /* simple thread ID */ - pthread_t tid; /* thread ID */ + wt_thread_t tid; /* thread ID */ int quit; /* thread should quit */ @@ -279,9 +279,9 @@ void bdb_remove(uint64_t, int *); void bdb_update(const void *, size_t, const void *, size_t); #endif -void *alter(void *); -void *backup(void *); -void *compact(void *); +WT_THREAD_RET alter(void *); +WT_THREAD_RET backup(void *); +WT_THREAD_RET compact(void *); void config_clear(void); void config_error(void); void config_file(const char *); @@ -293,7 +293,7 @@ void key_gen(WT_ITEM *, uint64_t); void key_gen_insert(WT_RAND_STATE *, WT_ITEM *, uint64_t); void key_gen_setup(WT_ITEM *); void key_len_setup(void); -void *lrt(void *); +WT_THREAD_RET lrt(void *); void path_setup(const char *); int read_row(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); uint32_t rng(WT_RAND_STATE *); diff --git a/test/format/lrt.c b/test/format/lrt.c index 4af9d66d0e1..b9622cdb635 100644 --- a/test/format/lrt.c +++ b/test/format/lrt.c @@ -32,7 +32,7 @@ * lrt -- * Start a long-running transaction. */ -void * +WT_THREAD_RET lrt(void *arg) { WT_CONNECTION *conn; @@ -182,5 +182,5 @@ lrt(void *arg) free(value.mem); free(buf); - return (NULL); + return (WT_THREAD_RET_VALUE); } diff --git a/test/format/ops.c b/test/format/ops.c index 02cce77eec2..a5e761d53a4 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -36,7 +36,7 @@ static int col_reserve(WT_CURSOR *, uint64_t, bool); static int col_update( TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static int nextprev(WT_CURSOR *, int); -static void *ops(void *); +static WT_THREAD_RET ops(void *); static int row_insert( TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); static int row_modify( @@ -62,7 +62,7 @@ wts_ops(int lastrun) TINFO **tinfo_list, *tinfo, total; WT_CONNECTION *conn; WT_SESSION *session; - pthread_t alter_tid, backup_tid, compact_tid, lrt_tid; + wt_thread_t alter_tid, backup_tid, compact_tid, lrt_tid; int64_t fourths, thread_ops; uint32_t i; int running; @@ -121,7 +121,8 @@ wts_ops(int lastrun) tinfo_list[i] = tinfo = dcalloc(1, sizeof(TINFO)); tinfo->id = (int)i + 1; tinfo->state = TINFO_RUNNING; - testutil_check(pthread_create(&tinfo->tid, NULL, ops, tinfo)); + testutil_check( + __wt_thread_create(NULL, &tinfo->tid, ops, tinfo)); } /* @@ -129,14 +130,16 @@ wts_ops(int lastrun) * long-running reader threads. */ if (g.c_alter) - testutil_check(pthread_create(&alter_tid, NULL, alter, NULL)); + testutil_check( + __wt_thread_create(NULL, &alter_tid, alter, NULL)); if (g.c_backups) - testutil_check(pthread_create(&backup_tid, NULL, backup, NULL)); + testutil_check( + __wt_thread_create(NULL, &backup_tid, backup, NULL)); if (g.c_compact) testutil_check( - pthread_create(&compact_tid, NULL, compact, NULL)); + __wt_thread_create(NULL, &compact_tid, compact, NULL)); if (!SINGLETHREADED && g.c_long_running_txn) - testutil_check(pthread_create(&lrt_tid, NULL, lrt, NULL)); + testutil_check(__wt_thread_create(NULL, &lrt_tid, lrt, NULL)); /* Spin on the threads, calculating the totals. */ for (;;) { @@ -158,7 +161,8 @@ wts_ops(int lastrun) break; case TINFO_COMPLETE: tinfo->state = TINFO_JOINED; - (void)pthread_join(tinfo->tid, NULL); + testutil_check( + __wt_thread_join(NULL, tinfo->tid)); break; case TINFO_JOINED: break; @@ -196,13 +200,13 @@ wts_ops(int lastrun) /* Wait for the backup, compaction, long-running reader threads. */ g.workers_finished = 1; if (g.c_alter) - (void)pthread_join(alter_tid, NULL); + testutil_check(__wt_thread_join(NULL, alter_tid)); if (g.c_backups) - (void)pthread_join(backup_tid, NULL); + testutil_check(__wt_thread_join(NULL, backup_tid)); if (g.c_compact) - (void)pthread_join(compact_tid, NULL); + testutil_check(__wt_thread_join(NULL, compact_tid)); if (!SINGLETHREADED && g.c_long_running_txn) - (void)pthread_join(lrt_tid, NULL); + testutil_check(__wt_thread_join(NULL, lrt_tid)); g.workers_finished = 0; if (g.logging != 0) { @@ -404,7 +408,7 @@ snap_check(WT_CURSOR *cursor, * ops -- * Per-thread operations. */ -static void * +static WT_THREAD_RET ops(void *arg) { enum { INSERT, MODIFY, READ, REMOVE, UPDATE } op; @@ -864,7 +868,7 @@ deadlock: ++tinfo->deadlock; free(value->mem); tinfo->state = TINFO_COMPLETE; - return (NULL); + return (WT_THREAD_RET_VALUE); } /* diff --git a/test/format/util.c b/test/format/util.c index 06e3f37b830..f09bb160893 100644 --- a/test/format/util.c +++ b/test/format/util.c @@ -472,7 +472,7 @@ fclose_and_clear(FILE **fpp) * alter -- * Periodically alter a table's metadata. */ -void * +WT_THREAD_RET alter(void *arg) { WT_CONNECTION *conn; @@ -510,5 +510,5 @@ alter(void *arg) } testutil_check(session->close(session, NULL)); - return (NULL); + return (WT_THREAD_RET_VALUE); } diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c index 12f86d664ef..7e76f61bd12 100644 --- a/test/recovery/random-abort.c +++ b/test/recovery/random-abort.c @@ -69,7 +69,7 @@ typedef struct { uint32_t id; } WT_THREAD_DATA; -static void * +static WT_THREAD_RET thread_run(void *arg) { FILE *fp; @@ -161,15 +161,15 @@ static void fill_db(uint32_t) static void fill_db(uint32_t nth) { - pthread_t *thr; WT_CONNECTION *conn; WT_SESSION *session; WT_THREAD_DATA *td; + wt_thread_t *thr; uint32_t i; int ret; const char *envconf; - thr = dcalloc(nth, sizeof(pthread_t)); + thr = dcalloc(nth, sizeof(*thr)); td = dcalloc(nth, sizeof(WT_THREAD_DATA)); if (chdir(home) != 0) testutil_die(errno, "Child chdir: %s", home); @@ -192,9 +192,8 @@ fill_db(uint32_t nth) td[i].conn = conn; td[i].start = (UINT64_MAX / nth) * i; td[i].id = i; - if ((ret = pthread_create( - &thr[i], NULL, thread_run, &td[i])) != 0) - testutil_die(ret, "pthread_create"); + testutil_check(__wt_thread_create( + NULL, &thr[i], thread_run, &td[i])); } printf("Spawned %" PRIu32 " writer threads\n", nth); fflush(stdout); @@ -203,7 +202,7 @@ fill_db(uint32_t nth) * it is killed. */ for (i = 0; i < nth; ++i) - testutil_assert(pthread_join(thr[i], NULL) == 0); + testutil_check(__wt_thread_join(NULL, thr[i])); /* * NOTREACHED */ diff --git a/test/thread/rw.c b/test/thread/rw.c index cbbd806c559..3283f780b32 100644 --- a/test/thread/rw.c +++ b/test/thread/rw.c @@ -29,8 +29,8 @@ #include "thread.h" static void print_stats(u_int); -static void *reader(void *); -static void *writer(void *); +static WT_THREAD_RET reader(void *); +static WT_THREAD_RET writer(void *); typedef struct { char *name; /* object name */ @@ -45,15 +45,13 @@ typedef struct { static INFO *run_info; -int +void rw_start(u_int readers, u_int writers) { struct timeval start, stop; + wt_thread_t *tids; double seconds; - pthread_t *tids; u_int i, name_index, offset, total_nops; - int ret; - void *thread_ret; tids = NULL; /* Keep GCC 4.1 happy. */ total_nops = 0; @@ -109,18 +107,15 @@ rw_start(u_int readers, u_int writers) /* Create threads. */ for (i = 0; i < readers; ++i) - if ((ret = pthread_create( - &tids[i], NULL, reader, (void *)(uintptr_t)i)) != 0) - testutil_die(ret, "pthread_create"); - for (; i < readers + writers; ++i) { - if ((ret = pthread_create( - &tids[i], NULL, writer, (void *)(uintptr_t)i)) != 0) - testutil_die(ret, "pthread_create"); - } + testutil_check(__wt_thread_create( + NULL, &tids[i], reader, (void *)(uintptr_t)i)); + for (; i < readers + writers; ++i) + testutil_check(__wt_thread_create( + NULL, &tids[i], writer, (void *)(uintptr_t)i)); /* Wait for the threads. */ for (i = 0; i < readers + writers; ++i) - (void)pthread_join(tids[i], &thread_ret); + testutil_check(__wt_thread_join(NULL, tids[i])); (void)gettimeofday(&stop, NULL); seconds = (stop.tv_sec - start.tv_sec) + @@ -147,8 +142,6 @@ rw_start(u_int readers, u_int writers) free(run_info); free(tids); - - return (0); } /* @@ -186,7 +179,7 @@ reader_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) * reader -- * Reader thread start function. */ -static void * +static WT_THREAD_RET reader(void *arg) { INFO *s; @@ -234,7 +227,7 @@ reader(void *arg) printf(" read thread %2d stopping: tid: %s, file: %s\n", id, tid, s->name); - return (NULL); + return (WT_THREAD_RET_VALUE); } /* @@ -291,7 +284,7 @@ writer_op(WT_SESSION *session, WT_CURSOR *cursor, INFO *s) * writer -- * Writer thread start function. */ -static void * +static WT_THREAD_RET writer(void *arg) { INFO *s; @@ -339,7 +332,7 @@ writer(void *arg) printf("write thread %2d stopping: tid: %s, file: %s\n", id, tid, s->name); - return (NULL); + return (WT_THREAD_RET_VALUE); } /* diff --git a/test/thread/t.c b/test/thread/t.c index 4b767e7f476..c6ff9a95145 100644 --- a/test/thread/t.c +++ b/test/thread/t.c @@ -160,8 +160,7 @@ main(int argc, char *argv[]) wt_connect(config_open); /* WiredTiger connection */ - if (rw_start(readers, writers)) /* Loop operations */ - return (EXIT_FAILURE); + rw_start(readers, writers); /* Loop operations */ stats(); /* Statistics */ diff --git a/test/thread/thread.h b/test/thread/thread.h index 86b1b55a30e..bcba442b4c1 100644 --- a/test/thread/thread.h +++ b/test/thread/thread.h @@ -46,6 +46,6 @@ extern int vary_nops; /* Operations per thread */ extern int session_per_op; /* New session per operation */ void load(const char *); -int rw_start(u_int, u_int); +void rw_start(u_int, u_int); void stats(void); void verify(const char *); diff --git a/test/windows/windows_shim.c b/test/windows/windows_shim.c index 33980260dc6..8986c1a5ae1 100644 --- a/test/windows/windows_shim.c +++ b/test/windows/windows_shim.c @@ -124,26 +124,3 @@ pthread_rwlock_wrlock(pthread_rwlock_t *rwlock) return (0); } - -#pragma warning( once : 4024 ) -#pragma warning( once : 4047 ) -int -pthread_create(pthread_t *tidret, const pthread_attr_t *ignored, - void *(*func)(void *), void * arg) -{ - ignored = ignored; - *tidret = CreateThread(NULL, 0, func, arg, 0, NULL); - - if (*tidret != NULL) - return (0); - - return (1); -} - -int -pthread_join(pthread_t thread, void **ignored) -{ - ignored = ignored; - WaitForSingleObject(thread, INFINITE); - return (0); -} -- cgit v1.2.1 From 3fcbbb370ba09e6a95bd303d1984c48446efd2c2 Mon Sep 17 00:00:00 2001 From: David Hows Date: Thu, 8 Jun 2017 12:02:21 +1000 Subject: WT-2596 Document behavior after a crash with a backup cursor open (#3455) --- src/docs/backup.dox | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/docs/backup.dox b/src/docs/backup.dox index 45edc85d6a5..91b15da9275 100644 --- a/src/docs/backup.dox +++ b/src/docs/backup.dox @@ -59,6 +59,11 @@ During the period the backup cursor is open, database checkpoints can be created, but no checkpoints can be deleted. This may result in significant file growth. +Additionally, if a crash occurs during the period the backup cursor is open and +logging is disabled, then the system will be restored to the most recent +checkpoint prior to the opening of the backup cursor, even if later database +checkpoints were created. + The following is a programmatic example of creating a backup: @snippet ex_all.c backup -- cgit v1.2.1 From 03e6b4f73c2f06aeb57b04bf1063986b2c8ad4d0 Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Thu, 8 Jun 2017 11:26:34 -0400 Subject: WT-3326 Enhance workload generator to support wtperf config files (#3433) Added wtperf.py script to run wtperf files. Added some supporting functions in the runner module to used by programs created by this tool. --- bench/workgen/runner/runner/__init__.py | 2 +- bench/workgen/runner/runner/core.py | 104 +++++++- bench/workgen/wtperf.py | 440 ++++++++++++++++++++++++++++++++ 3 files changed, 543 insertions(+), 3 deletions(-) create mode 100644 bench/workgen/wtperf.py diff --git a/bench/workgen/runner/runner/__init__.py b/bench/workgen/runner/runner/__init__.py index 67b547bc51b..ed21fffe8dc 100644 --- a/bench/workgen/runner/runner/__init__.py +++ b/bench/workgen/runner/runner/__init__.py @@ -88,5 +88,5 @@ except: shutil.rmtree('WT_TEST', True) os.mkdir('WT_TEST') -from .core import txn, extensions_config +from .core import txn, extensions_config, op_group_transaction, op_log_like, op_multi_table from .latency import workload_latency diff --git a/bench/workgen/runner/runner/core.py b/bench/workgen/runner/runner/core.py index a0f0d4d77cd..2c8311c4ca7 100644 --- a/bench/workgen/runner/runner/core.py +++ b/bench/workgen/runner/runner/core.py @@ -29,12 +29,12 @@ # runner/core.py # Core functions available to all runners import glob, os -import workgen +from workgen import Key, Operation, OpList, Table, Transaction, Value # txn -- # Put the operation (and any suboperations) within a transaction. def txn(op, config=None): - t = workgen.Transaction(config) + t = Transaction(config) op._transaction = t return op @@ -99,3 +99,103 @@ def extensions_config(exts): if len(extfiles) != 0: result = ',extensions=[' + ','.join(extfiles.values()) + ']' return result + +def _op_multi_table_as_list(ops_arg, tables): + result = [] + if ops_arg._optype != Operation.OP_NONE: + for table in tables: + result.append(Operation(ops_arg._optype, table, ops_arg._key, ops_arg._value)) + else: + for op in ops._group: + result.extend(_op_multi_table_as_list(op, tables)) + return result + +# A convenient way to build a list of operations +def op_append(op1, op2): + if op1 == None: + op1 = op2 + else: + op1 += op2 + return op1 + +# Emulate wtperf's table_count option. Spread the given operations over +# a set of tables. +def op_multi_table(ops_arg, tables): + ops = None + for op in _op_multi_table_as_list(ops_arg, tables): + ops = op_append(ops, op) + return ops + +# should be 8 bytes format 'Q' +_logkey = Key(Key.KEYGEN_APPEND, 8) +def _op_log_op(op, log_table): + keysize = op._key._size + if keysize == 0: + keysize = op._table.options.key_size + valuesize = op._value._size + if valuesize == 0: + valuesize = op._table.options.value_size + v = Value(keysize + valuesize) + return Operation(Operation.OP_INSERT, log_table, _logkey, v) + +def _optype_is_write(optype): + return optype == Operation.OP_INSERT or optype == Operation.OP_UPDATE or \ + optype == Operation.OP_REMOVE + +# Emulate wtperf's log_like option. For all operations, add a second +# insert operation going to a log table. +def op_log_like(op, log_table, ops_per_txn): + if op._optype != Operation.OP_NONE: + if _optype_is_write(op._optype): + op += _op_log_op(op, log_table) + if ops_per_txn == 0: + op = txn(op) # txn for each action. + else: + oplist = [] + for op2 in op._group: + if op2._optype == Operation.OP_NONE: + oplist.append(op_log_like(op2, log_table)) + elif ops_per_txn == 0 and _optype_is_write(op2._optype): + op2 += _op_log_op(op2, log_table) + oplist.append(txn(op2)) # txn for each action. + else: + oplist.append(op2) + if _optype_is_write(op2._optype): + oplist.append(_op_log_op(op2, log_table)) + op._group = OpList(oplist) + return op + +def _op_transaction_list(oplist, txn_config): + result = None + for op in oplist: + result = op_append(result, op) + return txn(result, txn_config) + +# Emulate wtperf's ops_per_txn option. Create transactions around +# groups of operations of the indicated size. +def op_group_transaction(ops_arg, ops_per_txn, txn_config): + if ops_arg != Operation.OP_NONE: + return txn(ops_arg, txn_config) + if ops_arg._transaction != None: + raise Exception('nested transactions not supported') + if ops_arg._repeatgroup != None: + raise Exception('grouping transactions with multipliers not supported') + + oplist = [] + ops = None + nops = 0 + txgroup = [] + for op in ops_arg._group: + if op.optype == Operation.OP_NONE: + oplist.append(_op_transaction_list(txgroup, txn_config)) + txgroup = [] + oplist.append(op) + else: + txgroup.append(op) + if len(txgroup) >= ops_per_txn: + oplist.append(_op_transaction_list(txgroup, txn_config)) + txgroup = [] + if len(txgroup) > 0: + oplist.append(_op_transaction_list(txgroup, txn_config)) + ops_arg._group = OpList(oplist) + return ops_arg diff --git a/bench/workgen/wtperf.py b/bench/workgen/wtperf.py new file mode 100644 index 00000000000..3a196fe7b57 --- /dev/null +++ b/bench/workgen/wtperf.py @@ -0,0 +1,440 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2017 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +# wtperf.py +# A partial emulation of wtperf. Translates a .wtperf file into a Python +# script that uses the workgen module, and runs the script. Errors are +# issued for any .wtperf directives that are not known. +# See also the usage() function. +# +from __future__ import print_function +import os, sys, tempfile + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + +class OptionValue: + def __init__(self, value, filename, linenum): + self.value = value + self.filename = filename + self.linenum = linenum + +class TranslateException(Exception): + pass + +class Options(object): + pass + +class Translator: + def __init__(self, filename, prefix, verbose): + self.filename = filename + self.prefix = prefix + self.verbose = verbose + self.linenum = 0 + self.opts = {} + self.used_opts = {} + self.has_error = False + + def error_file_line(self, fname, linenum, msg): + self.has_error = True + eprint(fname + ':' + str(linenum) + ': error: ' + msg) + + # Report an error and continue + def error(self, msg): + self.error_file_line(self.filename, self.linenum, msg) + + # Report an error and unwind the stack + def fatal_error(self, msg, errtype): + self.error(msg) + raise TranslateException(errtype) + + supported_opt_list = [ 'compression', 'conn_config', 'icount', + 'key_sz', 'log_like_table', + 'populate_ops_per_txn', 'populate_threads', + 'reopen_connection', + 'table_config', 'table_count', + 'threads', 'transaction_config', 'value_sz' ] + + def set_opt(self, optname, val): + if optname not in self.supported_opt_list: + self.error("unknown option: " + optname) + return + elif val[0] == '"' and val[-1] == '"': + v = val[1:-1] + elif val == 'true': + v = True + elif val == 'false': + v = False + elif val[0] == '(': + v = val # config string stored as is + else: + try: + v = int(val) # it might be an integer + except ValueError: + v = val # it's a string after all + self.opts[optname] = OptionValue(v, self.filename, self.linenum) + + def get_opt(self, optname, dfault): + if optname in self.opts: + ret = self.opts[optname] + self.filename = ret.filename + self.linenum = ret.linenum + self.used_opts[optname] = 1 + return ret.value + else: + return dfault + + def get_int_opt(self, optname, dfault): + return self.get_opt(optname, dfault) + 0 + + def get_boolean_opt(self, optname, dfault): + return not not self.get_opt(optname, dfault) + + # Split a string 'left_side=right_side' into two parts + def split_assign(self, s): + equalpos = s.find('=') + if equalpos < 0: + self.error("missing '=' for line: " + line) + return (None, None) + else: + return s.split('=', 1) + + # Split a config string honoring nesting e.g. + # "(abc=123,def=234,ghi=(hi=1,bye=2))" would return 3 items. + def split_config_parens(self, s): + if s[0:1] != '(': + import pdb + pdb.set_trace() + self.fatal_error('missing left paren', 'config parse error') + if s[-1:] != ')': + self.fatal_error('missing right paren', 'config parse error') + s = s[1:-1] + result = [] + level = 0 + cur = '' + for ch in s: + if ch == ',' and level == 0: + result.append(cur) + cur = '' + else: + cur += ch + if ch == '(': + level += 1 + elif ch == ')': + level -= 1 + if level < 0: + self.fatal_error('unbalanced paren', 'config parse error') + if level != 0: + self.fatal_error('unbalanced paren', 'config parse error') + if len(cur) != 0: + result.append(cur) + return result + + def assign_str(self, left, right): + return left + '=' + str(right) + '\n' + + def add_operation_str(self, count, opname, multi): + result = '' + tablename = 'tables[0]' if multi else 'table' + if count > 1: + result += str(count) + ' * ' + if count > 0: + result += 'Operation(Operation.' + opname + ', ' + \ + tablename + ') + \\\n' + result += ' ' + return result + + # Wtperf's throttle is based on the number of regular operations, + # not including log_like operations. Workgen counts all operations, + # it doesn't treat log operations any differently. Adjust the throttle + # number to account for the difference. + def calc_throttle(self, thread_opts, log_like_table): + throttle = thread_opts.throttle + if not log_like_table: + return (throttle, '') + modify = thread_opts.inserts + thread_opts.updates + regular = modify + thread_opts.reads + total = regular + modify + factor = (total + 0.0) / regular + new_throttle = int(throttle * factor) + if new_throttle == throttle: + comment = '' + else: + comment = '# wtperf throttle=' + str(throttle) + ' adjusted by ' + \ + str(factor) + ' to compensate for log_like operations.\n' + return (new_throttle, comment) + + def parse_threads(self, threads_config): + tdecls = '' + tlist = self.split_config_parens(threads_config) + table_count = self.get_int_opt('table_count', 1) + log_like_table = self.get_boolean_opt('log_like_table', False) + txn_config = self.get_opt('transaction_config', '') + if log_like_table: + tdecls += 'log_name = "table:log"\n' + tdecls += 's.create(log_name, "key_format=S,value_format=S," +' + \ + ' compress_table_config)\n' + tdecls += 'log_table = Table(log_name)\n\n' + thread_count = 0 + tnames = '' + multi = (table_count > 1) + for t in tlist: + thread_name = 'thread' + str(thread_count) + thread_count += 1 + + # For wtperf compatibility, we allow both 'insert/inserts' etc. + topts = Options() + topts.count = 1 + topts.insert = 0 + topts.inserts = 0 + topts.ops_per_txn = 0 + topts.read = 0 + topts.reads = 0 + topts.throttle = 0 + topts.update = 0 + topts.updates = 0 + + for o in self.split_config_parens(t): + (k, v) = self.split_assign(o) + if hasattr(topts, k): + try: + setattr(topts, k, int(v)) + except ValueError: + self.error('thread option ' + k + ': integer expected') + else: + self.error('unknown thread option: ' + k) + + topts.inserts += topts.insert; topts.insert = 0 + topts.updates += topts.update; topts.update = 0 + topts.reads += topts.read; topts.read = 0 + if topts.count == 0: + continue + + if topts.inserts + topts.reads + topts.updates == 0: + self.fatal_error('need read/insert/update/...', + 'thread config error') + tdecls += 'ops = ' + tdecls += self.add_operation_str(topts.inserts, 'OP_INSERT', multi) + tdecls += self.add_operation_str(topts.reads, 'OP_SEARCH', multi) + tdecls += self.add_operation_str(topts.updates, 'OP_UPDATE', multi) + tdecls = tdecls.rstrip(' \n\\+') + '\n' + if multi: + tdecls += 'ops = op_multi_table(ops, tables)\n' + if topts.ops_per_txn > 0: + tdecls += 'ops = op_group_transaction(ops, ' + \ + str(topts.ops_per_txn) + ', "' + txn_config + '")\n' + if log_like_table: + tdecls += 'ops = op_log_like(ops, log_table, ' + \ + str(topts.ops_per_txn) + ')\n' + tdecls += thread_name + ' = Thread(ops)\n' + if topts.throttle > 0: + (throttle, comment) = self.calc_throttle(topts, log_like_table) + tdecls += comment + tdecls += self.assign_str(thread_name + '.options.throttle', + throttle) + tdecls += '\n' + if topts.count > 1: + tnames += str(topts.count) + ' * ' + tnames += thread_name + ' + ' + + tnames = tnames.rstrip(' +') + return (tdecls, tnames) + + def translate(self): + try: + return self.translate_inner() + except TranslateException: + # An error has already been reported + return None + + def translate_inner(self): + workloadopts = '' + with open(self.filename) as fin: + for line in fin: + self.linenum += 1 + commentpos = line.find('#') + if commentpos >= 0: + line = line[0:commentpos] + line = line.strip() + if len(line) == 0: + continue + (key, val) = self.split_assign(line) + if key in [ 'max_latency', 'report_file', 'report_interval', + 'run_time', 'sample_interval', 'sample_rate' ]: + workloadopts += 'workload.options.' + key + '=' + val + '\n' + else: + self.set_opt(key, val) + + table_count = self.get_int_opt('table_count', 1) + conn_config = self.get_opt('conn_config', '') + table_config = self.get_opt('table_config', '') + key_sz = self.get_int_opt('key_sz', 20) + value_sz = self.get_int_opt('value_sz', 100) + reopen = self.get_boolean_opt('reopen_connection', False) + compression = self.get_opt('compression', '') + txn_config = self.get_opt('transaction_config', '') + + s = '#/usr/bin/env python\n' + s += '# generated from ' + self.filename + '\n' + s += self.prefix + s += 'from runner import *\n' + s += 'from wiredtiger import *\n' + s += 'from workgen import *\n' + s += '\n' + s += 'context = Context()\n' + s += 'conn_config = "' + conn_config + '"\n' + if compression != '': + s += 'conn_config += extensions_config(["compressors/' + \ + compression + '"])\n' + compression = 'block_compressor=' + compression + ',' + s += 'conn = wiredtiger_open("WT_TEST", "create," + conn_config)\n' + s += 's = conn.open_session()\n' + s += '\n' + s += 'wtperf_table_config = "key_format=S,value_format=S,type=lsm," +\\\n' + s += ' "exclusive=true,allocation_size=4kb," +\\\n' + s += ' "internal_page_max=64kb,leaf_page_max=4kb,split_pct=100,"\n' + s += 'compress_table_config = "' + compression + '"\n' + s += 'table_config = "' + table_config + '"\n' + if table_count == 1: + s += 'tname = "file:test.wt"\n' + s += 's.create(tname, wtperf_table_config +\\\n' + s += ' compress_table_config + table_config)\n' + s += 'table = Table(tname)\n' + s += 'table.options.key_size = ' + str(key_sz) + '\n' + s += 'table.options.value_size = ' + str(value_sz) + '\n' + else: + s += 'table_count = ' + str(table_count) + '\n' + s += 'tables = []\n' + s += 'for i in range(0, table_count):\n' + s += ' tname = "file:test" + str(i) + ".wt"\n' + s += ' s.create(tname, ' + \ + 'wtperf_table_config + ' + \ + 'compress_table_config + table_config)\n' + s += ' t = Table(tname)\n' + s += ' t.options.key_size = ' + str(key_sz) + '\n' + s += ' t.options.value_size = ' + str(value_sz) + '\n' + s += ' tables.append(t)\n' + s += '\n' + + icount = self.get_int_opt('icount', 0) + pop_thread = self.get_int_opt('populate_threads', 1) + pop_per_txn = self.get_int_opt('populate_ops_per_txn', 0) + if icount != 0: + if pop_thread == 0: + self.fatal_error('icount != 0 and populate_threads == 0: ' +\ + 'cannot populate entries with no threads') + elif pop_thread == 1: + mult = '' + else: + mult = str(pop_thread) + ' * ' + + # if there are multiple tables to be filled during populate, + # the icount is split between them all. + nops_per_thread = icount / (pop_thread * table_count) + if table_count == 1: + s += 'pop_ops = Operation(Operation.OP_INSERT, table)\n' + else: + s += 'pop_ops = Operation(Operation.OP_INSERT, tables[0])\n' + s += 'pop_ops = op_multi_table(pop_ops, tables)\n' + if pop_per_txn > 0: + s += 'pop_ops = op_group_transaction(pop_ops, ' + \ + str(pop_per_txn) + ', "' + txn_config + '")\n' + s += 'pop_thread = Thread(pop_ops * ' + str(nops_per_thread) + ')\n' + s += 'pop_workload = Workload(context, ' + mult + 'pop_thread)\n' + if self.verbose > 0: + s += 'print("populate:")\n' + s += 'pop_workload.run(conn)\n' + else: + if self.get_int_opt('populate_threads', 0) != 0: + self.error("populate_threads > 0, icount == 0") + + thread_config = self.get_opt('threads', '') + if thread_config != '': + (t_create, t_var) = self.parse_threads(thread_config) + s += '\n' + t_create + if reopen: + s += '\n# reopen the connection\n' + s += 'conn.close()\n' + s += 'conn = wiredtiger_open(' + \ + '"WT_TEST", "create," + conn_config)\n' + s += '\n' + s += 'workload = Workload(context, ' + t_var + ')\n' + s += workloadopts + if self.verbose > 0: + s += 'print("workload:")\n' + s += 'workload.run(conn)\n' + + for o in self.used_opts: + del self.opts[o] + if len(self.opts) != 0: + self.error('internal error, options not handled: ' + str(self.opts)) + return s + +def usage(): + eprint(( + 'Usage: python wtperf.py [ options ] file.wtperf ...\n' + '\n' + 'Options:\n' + ' --python Python output generated on stdout\n' + ' -v --verbose Verbose output\n' + '\n' + 'If --python is not specified, the resulting workload is run.')) + +verbose = 0 +py_out = False +workgen_dir = os.path.dirname(os.path.abspath(__file__)) +runner_dir = os.path.join(workgen_dir, 'runner') +prefix = ( + '# The next lines are unneeded if this script is in the runner directory.\n' + 'import sys\n' + 'sys.path.append("' + runner_dir + '")\n\n') + +exit_status = 0 +for arg in sys.argv[1:]: + if arg == '--python': + py_out = True + elif arg == '--verbose' or arg == '-v': + verbose += 1 + elif arg.endswith('.wtperf'): + translator = Translator(arg, prefix, verbose) + pysrc = translator.translate() + if translator.has_error: + exit_status = 1 + elif py_out: + print(pysrc) + else: + (outfd, tmpfile) = tempfile.mkstemp(suffix='.py') + os.write(outfd, pysrc) + os.close(outfd) + execfile(tmpfile) + os.remove(tmpfile) + else: + usage() + sys.exit(1) +sys.exit(exit_status) -- cgit v1.2.1 From 9c44dde3bae6df82417ff30ac4ce73aa3beeeb02 Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Thu, 8 Jun 2017 11:29:26 -0400 Subject: WT-3346 create JSON output for latency sampling. (#3450) Added a script to merge workgen and WT outputs. Make workgen_stat.sh script agnostic as to what analyze tool to run. --- bench/workgen/runner/workgen_stat.sh | 75 ++++++++++++++++++++++++++++++++++++ bench/workgen/workgen.cxx | 54 ++++++++++++++++++++++++-- bench/workgen/workgen.h | 1 + bench/workgen/workgen_func.c | 13 +++++++ bench/workgen/workgen_func.h | 2 + bench/workgen/workgen_int.h | 1 + 6 files changed, 142 insertions(+), 4 deletions(-) create mode 100755 bench/workgen/runner/workgen_stat.sh diff --git a/bench/workgen/runner/workgen_stat.sh b/bench/workgen/runner/workgen_stat.sh new file mode 100755 index 00000000000..1739c29859e --- /dev/null +++ b/bench/workgen/runner/workgen_stat.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# +# workgen_stat.sh - combine JSON time series output from WT and workgen. +# +Usage() { + cat < # set the WiredTiger home directory + -e # run analyzer on the combined files + -o # output file for result + +At least one of '-t2' or '-o' must be selected. +EOF + exit 1 +} + +Filter() { + sed -e 's/"version" *: *"[^"]*",//' "$@" +} + +wthome=. +outfile= +analyze= + +while [ "$#" != 0 ]; do + arg="$1" + shift + case "$arg" in + -h ) + if [ $# = 0 ]; then + Usage + fi + wthome="$1" + shift + ;; + -o ) + if [ $# = 0 ]; then + Usage + fi + outfile="$1" + shift + ;; + -e ) + if [ $# = 0 ]; then + Usage + fi + analyze="$1" + shift + ;; + esac +done +if [ ! -d "$wthome" ]; then + echo "$wthome: WT home directory does not exist" + exit 1 +fi +if [ ! -f "$wthome/WiredTiger.wt" ]; then + echo "$wthome: directory is not a WiredTiger home directory" + exit 1 +fi +if [ "$outfile" = '' ]; then + if [ "$analyze" = false ]; then + Usage + fi + outfile="$wthome/stat_tmp.json" +fi +(cd $wthome; Filter WiredTigerStat.* sample.json) | sort > $outfile +if [ "$analyze" != '' ]; then + sysname=`uname -s` + if [ "$sysname" = Darwin ]; then + open -a "$analyze" "$outfile" + else + "$analyze" "$outfile" + fi +fi diff --git a/bench/workgen/workgen.cxx b/bench/workgen/workgen.cxx index c56acfd2989..880b8ca6467 100644 --- a/bench/workgen/workgen.cxx +++ b/bench/workgen/workgen.cxx @@ -267,16 +267,18 @@ int ContextInternal::create_all() { } Monitor::Monitor(WorkloadRunner &wrunner) : - _errno(0), _exception(), _wrunner(wrunner), _stop(false), _handle() {} + _errno(0), _exception(), _wrunner(wrunner), _stop(false), _handle(), + _out(NULL), _json(NULL) {} Monitor::~Monitor() {} int Monitor::run() { struct timespec t; struct tm *tm, _tm; - char time_buf[64]; + char time_buf[64], version[100]; Stats prev_totals; WorkloadOptions *options = &_wrunner._workload->options; uint64_t latency_max = (uint64_t)options->max_latency; + bool first; (*_out) << "#time," << "totalsec," @@ -295,6 +297,8 @@ int Monitor::run() { << "update maximum latency(uS)" << std::endl; + first = true; + workgen_version(version, sizeof(version)); Stats prev_interval; while (!_stop) { for (int i = 0; i < options->sample_interval && !_stop; i++) @@ -337,6 +341,32 @@ int Monitor::run() { << "," << interval.update.max_latency << std::endl; + if (_json != NULL) { +#define WORKGEN_TIMESTAMP_JSON "%Y-%m-%dT%H:%M:%S.000Z" + (void)strftime(time_buf, sizeof(time_buf), + WORKGEN_TIMESTAMP_JSON, tm); + +#define TRACK_JSON(name, t) \ + "\"" << (name) << "\":{" \ + << "\"ops per sec\":" << ((t).ops / interval_secs) \ + << ",\"average latency\":" << (t).average_latency() \ + << ",\"min latency\":" << (t).min_latency \ + << ",\"max latency\":" << (t).max_latency \ + << "}" + + (*_json) << "{"; + if (first) { + (*_json) << "\"version\":\"" << version << "\","; + first = false; + } + (*_json) << "\"localTime\":\"" << time_buf + << "\",\"workgen\":{" + << TRACK_JSON("read", interval.read) << "," + << TRACK_JSON("insert", interval.insert) << "," + << TRACK_JSON("update", interval.update) + << "}}" << std::endl; + } + uint64_t read_max = interval.read.max_latency; uint64_t insert_max = interval.read.max_latency; uint64_t update_max = interval.read.max_latency; @@ -1315,8 +1345,8 @@ TableInternal::TableInternal(const TableInternal &other) : _tint(other._tint), TableInternal::~TableInternal() {} WorkloadOptions::WorkloadOptions() : max_latency(0), - report_file("workload.stat"), report_interval(0), - run_time(0), sample_interval(0), sample_rate(1), + report_file("workload.stat"), report_interval(0), run_time(0), + sample_file("sample.json"), sample_interval(0), sample_rate(1), _options() { _options.add_int("max_latency", max_latency, "prints warning if any latency measured exceeds this number of " @@ -1329,6 +1359,11 @@ WorkloadOptions::WorkloadOptions() : max_latency(0), "The file name is relative to the connection's home directory. " "When set to the empty string, stdout is used."); _options.add_int("run_time", run_time, "total workload seconds"); + _options.add_string("sample_file", sample_file, + "file name for collecting latency output in a JSON-like format, " + "enabled by the report_interval option. " + "The file name is relative to the connection's home directory. " + "When set to the empty string, no JSON is emitted."); _options.add_int("sample_interval", sample_interval, "performance logging every interval seconds, 0 to disable"); _options.add_int("sample_rate", sample_rate, @@ -1492,6 +1527,7 @@ int WorkloadRunner::run_all() { WorkloadOptions *options = &_workload->options; Monitor monitor(*this); std::ofstream monitor_out; + std::ofstream monitor_json; std::ostream &out = *_report_out; WT_DECL_RET; @@ -1510,6 +1546,12 @@ int WorkloadRunner::run_all() { open_report_file(monitor_out, "monitor", "monitor output file"); monitor._out = &monitor_out; + if (!options->sample_file.empty()) { + open_report_file(monitor_json, options->sample_file.c_str(), + "sample JSON output file"); + monitor._json = &monitor_json; + } + if ((ret = pthread_create(&monitor._handle, NULL, monitor_main, &monitor)) != 0) { std::cerr << "monitor thread failed err=" << ret << std::endl; @@ -1588,6 +1630,10 @@ int WorkloadRunner::run_all() { << std::endl; if (exception == NULL && !monitor._exception._str.empty()) exception = &monitor._exception; + + monitor_out.close(); + if (!options->sample_file.empty()) + monitor_json.close(); } // issue the final report diff --git a/bench/workgen/workgen.h b/bench/workgen/workgen.h index c1ae01ed5a4..c7be8ee0035 100644 --- a/bench/workgen/workgen.h +++ b/bench/workgen/workgen.h @@ -358,6 +358,7 @@ struct WorkloadOptions { int run_time; int sample_interval; int sample_rate; + std::string sample_file; WorkloadOptions(); WorkloadOptions(const WorkloadOptions &other); diff --git a/bench/workgen/workgen_func.c b/bench/workgen/workgen_func.c index 2e1271a515e..5ce2146a8e4 100644 --- a/bench/workgen/workgen_func.c +++ b/bench/workgen/workgen_func.c @@ -87,3 +87,16 @@ workgen_u64_to_string_zf(uint64_t n, char *buf, size_t len) { u64_to_string_zf(n, buf, len); } + +#define WORKGEN_VERSION_PREFIX "workgen-" +extern void +workgen_version(char *buf, size_t len) +{ + size_t prefix_len; + + prefix_len = strlen(WORKGEN_VERSION_PREFIX); + (void)strncpy(buf, WORKGEN_VERSION_PREFIX, len); + if (len > prefix_len) + (void)strncpy(&buf[prefix_len], WIREDTIGER_VERSION_STRING, + len - prefix_len); +} diff --git a/bench/workgen/workgen_func.h b/bench/workgen/workgen_func.h index 20ebf2632cc..ec7ecf0a504 100644 --- a/bench/workgen/workgen_func.h +++ b/bench/workgen/workgen_func.h @@ -42,3 +42,5 @@ extern void workgen_random_free(struct workgen_random_state *rnd_state); extern void workgen_u64_to_string_zf(uint64_t n, char *buf, size_t len); +extern void +workgen_version(char *buf, size_t len); diff --git a/bench/workgen/workgen_int.h b/bench/workgen/workgen_int.h index 01fb727691b..9283aea1d7b 100644 --- a/bench/workgen/workgen_int.h +++ b/bench/workgen/workgen_int.h @@ -146,6 +146,7 @@ struct Monitor { volatile bool _stop; pthread_t _handle; std::ostream *_out; + std::ostream *_json; Monitor(WorkloadRunner &wrunner); ~Monitor(); -- cgit v1.2.1 From b970000e3eb41c9b1f3ed7a9ce64ac8b62575a9f Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Fri, 9 Jun 2017 02:10:41 +1000 Subject: WT-3362 Checkpoints shouldn't block drops. (#3459) Testing has uncovered another case where drops can spin trying to lock a checkpoint handle until a checkpoint completes. This change fixes that in two ways: attempting to lock (but not open) a handle won't spin, and drop will always attempt to lock the live tree before locking any checkpoint handles. --- src/conn/conn_dhandle.c | 89 ++++++++++++++++++++++++++----------------- src/session/session_dhandle.c | 3 +- 2 files changed, 55 insertions(+), 37 deletions(-) diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index d4670562eb8..97fdc7557ee 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -480,6 +480,49 @@ err: WT_DHANDLE_RELEASE(dhandle); return (ret); } +/* + * __conn_dhandle_close_one -- + * Lock and, if necessary, close a data handle. + */ +static int +__conn_dhandle_close_one(WT_SESSION_IMPL *session, + const char *uri, const char *checkpoint, bool force) +{ + WT_DECL_RET; + + /* + * Lock the handle exclusively. If this is part of schema-changing + * operation (indicated by metadata tracking being enabled), hold the + * lock for the duration of the operation. + */ + WT_RET(__wt_session_get_btree(session, uri, checkpoint, + NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY)); + if (WT_META_TRACKING(session)) + WT_RET(__wt_meta_track_handle_lock(session, false)); + + /* + * We have an exclusive lock, which means there are no cursors open at + * this point. Close the handle, if necessary. + */ + if (F_ISSET(session->dhandle, WT_DHANDLE_OPEN)) { + __wt_meta_track_sub_on(session); + ret = __wt_conn_btree_sync_and_close(session, false, force); + + /* + * If the close succeeded, drop any locks it acquired. If + * there was a failure, this function will fail and the whole + * transaction will be rolled back. + */ + if (ret == 0) + ret = __wt_meta_track_sub_off(session); + } + + if (!WT_META_TRACKING(session)) + WT_TRET(__wt_session_release_btree(session)); + + return (ret); +} + /* * __wt_conn_dhandle_close_all -- * Close all data handles handles with matching name (including all @@ -500,48 +543,22 @@ __wt_conn_dhandle_close_all( F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE)); WT_ASSERT(session, session->dhandle == NULL); + /* + * Lock the live handle first. This ordering is important: we rely on + * locking the live handle to fail fast if the tree is busy (e.g., with + * cursors open or in a checkpoint). + */ + WT_ERR(__conn_dhandle_close_one(session, uri, NULL, force)); + bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE; TAILQ_FOREACH(dhandle, &conn->dhhash[bucket], hashq) { if (strcmp(dhandle->name, uri) != 0 || + dhandle->checkpoint == NULL || F_ISSET(dhandle, WT_DHANDLE_DEAD)) continue; - session->dhandle = dhandle; - - /* - * Lock the handle exclusively. If this is part of - * schema-changing operation (indicated by metadata tracking - * being enabled), hold the lock for the duration of the - * operation. - */ - WT_ERR(__wt_session_get_btree(session, - dhandle->name, dhandle->checkpoint, - NULL, WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY)); - if (WT_META_TRACKING(session)) - WT_ERR(__wt_meta_track_handle_lock(session, false)); - - /* - * We have an exclusive lock, which means there are no cursors - * open at this point. Close the handle, if necessary. - */ - if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) { - __wt_meta_track_sub_on(session); - ret = __wt_conn_btree_sync_and_close( - session, false, force); - - /* - * If the close succeeded, drop any locks it acquired. - * If there was a failure, this function will fail and - * the whole transaction will be rolled back. - */ - if (ret == 0) - ret = __wt_meta_track_sub_off(session); - } - - if (!WT_META_TRACKING(session)) - WT_TRET(__wt_session_release_btree(session)); - - WT_ERR(ret); + WT_ERR(__conn_dhandle_close_one( + session, dhandle->name, dhandle->checkpoint, force)); } err: session->dhandle = NULL; diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c index 4565ae71896..dd2b6ef30ff 100644 --- a/src/session/session_dhandle.c +++ b/src/session/session_dhandle.c @@ -229,7 +229,8 @@ __wt_session_lock_dhandle( WT_ASSERT(session, !F_ISSET(dhandle, WT_DHANDLE_DEAD)); return (0); } - if (ret != EBUSY || (is_open && want_exclusive)) + if (ret != EBUSY || (is_open && want_exclusive) || + LF_ISSET(WT_DHANDLE_LOCK_ONLY)) return (ret); lock_busy = true; -- cgit v1.2.1 From 84429199fd94f8a8201c5aa77432a2557d326902 Mon Sep 17 00:00:00 2001 From: Don Anderson Date: Wed, 14 Jun 2017 11:29:28 -0400 Subject: WT-3369 WT_CURSOR->uri should always match the URI used to open the cursor (#3464) --- src/cursor/cur_table.c | 6 ++++++ src/lsm/lsm_cursor.c | 4 +--- test/suite/test_cursor01.py | 1 + 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c index 3959d58476b..000fcae99f2 100644 --- a/src/cursor/cur_table.c +++ b/src/cursor/cur_table.c @@ -988,6 +988,12 @@ __wt_curtable_open(WT_SESSION_IMPL *session, table->cgroups[0]->source, NULL, cfg, cursorp); __wt_schema_release_table(session, table); + if (ret == 0) { + /* Fix up the public URI to match what was passed in. */ + cursor = *cursorp; + __wt_free(session, cursor->uri); + WT_TRET(__wt_strdup(session, uri, &cursor->uri)); + } return (ret); } diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index 99920367600..1d15ed793a2 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -1725,8 +1725,6 @@ __wt_clsm_close(WT_CURSOR *cursor) /* In case we were somehow left positioned, clear that. */ __clsm_leave(clsm); - /* The WT_LSM_TREE owns the URI. */ - cursor->uri = NULL; if (clsm->lsm_tree != NULL) __wt_lsm_tree_release(session, clsm->lsm_tree); WT_TRET(__wt_cursor_close(cursor)); @@ -1810,7 +1808,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session, cursor = &clsm->iface; *cursor = iface; cursor->session = &session->iface; - cursor->uri = lsm_tree->name; + WT_ERR(__wt_strdup(session, lsm_tree->name, &cursor->uri)); cursor->key_format = lsm_tree->key_format; cursor->value_format = lsm_tree->value_format; diff --git a/test/suite/test_cursor01.py b/test/suite/test_cursor01.py index 41b017aa882..99bdb6182c7 100644 --- a/test/suite/test_cursor01.py +++ b/test/suite/test_cursor01.py @@ -99,6 +99,7 @@ class test_cursor01(wttest.WiredTigerTestCase): self.pr('creating cursor') cursor = self.session.open_cursor(tablearg, None, None) self.assertCursorHasNoKeyValue(cursor) + self.assertEqual(cursor.uri, tablearg) for i in range(0, self.nentries): cursor[self.genkey(i)] = self.genvalue(i) -- cgit v1.2.1 From e560dbee85143ecc78c9e6b41b66af179a9f81a5 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 14 Jun 2017 15:57:18 -0400 Subject: SERVER-29439 WiredTiger turtle file "MoveFileExW: Access is denied." error. (#3460) Opening a metadata cursor, which reads the turtle file for the metadata file checkpoint, can race with a checkpoint renaming a new turtle file into place, and Windows doesn't support renaming a currently open file. --- src/meta/meta_table.c | 4 +++- src/meta/meta_turtle.c | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c index 65835a16c8b..326ad12bd33 100644 --- a/src/meta/meta_table.c +++ b/src/meta/meta_table.c @@ -267,7 +267,9 @@ __wt_metadata_search(WT_SESSION_IMPL *session, const char *key, char **valuep) * that Coverity complains a lot, add an error check to get some * peace and quiet. */ - if ((ret = __wt_turtle_read(session, key, valuep)) != 0) + WT_WITH_TURTLE_LOCK(session, + ret = __wt_turtle_read(session, key, valuep)); + if (ret != 0) __wt_free(session, *valuep); return (ret); } diff --git a/src/meta/meta_turtle.c b/src/meta/meta_turtle.c index 7a99df6b83b..362a3aa2bbe 100644 --- a/src/meta/meta_turtle.c +++ b/src/meta/meta_turtle.c @@ -246,6 +246,9 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) *valuep = NULL; + /* Require single-threading. */ + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TURTLE)); + /* * Open the turtle file; there's one case where we won't find the turtle * file, yet still succeed. We create the metadata file before creating @@ -302,6 +305,9 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) fs = NULL; + /* Require single-threading. */ + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TURTLE)); + /* * Create the turtle setup file: we currently re-write it from scratch * every time. -- cgit v1.2.1 From 625cbb33b9c39e6bb1fa00f21f3beb5573d645d6 Mon Sep 17 00:00:00 2001 From: Mark Benvenuto Date: Wed, 14 Jun 2017 16:51:20 -0400 Subject: WT-3367 Fix test linking to support macOS 10.12 (#3463) --- test/mciproject.yml | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/test/mciproject.yml b/test/mciproject.yml index 6456475aa00..72022fe46ec 100644 --- a/test/mciproject.yml +++ b/test/mciproject.yml @@ -95,6 +95,16 @@ tasks: script: | set -o errexit set -o verbose + + # On 10.12, change the binary location with install_name_tool since DYLD_LIBRARY_PATH + # appears not to work for dynamic modules loaded by python. For wt, the libtool generated + # script has the wrong path for running on test machines. + if [ "$(uname -s)" == "Darwin" ]; then + WT_VERSION=$(m4 build_posix/aclocal/version.m4) + install_name_tool -change /usr/local/lib/libwiredtiger-$WT_VERSION.dylib $(pwd)/.libs/libwiredtiger-$WT_VERSION.dylib lang/python/_wiredtiger.so + install_name_tool -change /usr/local/lib/libwiredtiger-$WT_VERSION.dylib $(pwd)/.libs/libwiredtiger-$WT_VERSION.dylib .libs/wt + fi + ${test_env_vars|} python ./test/suite/run.py -v 2 ${smp_command|} 2>&1 - name: compile-windows-alt @@ -182,10 +192,10 @@ buildvariants: #- name: format - Enable when we have a solution for hangs and crashses - name: fops -- name: osx-1010 - display_name: OS X 10.10 +- name: macos-1012 + display_name: OS X 10.12 run_on: - - osx-1010 + - macos-1012 expansions: smp_command: -j $(sysctl -n hw.logicalcpu) configure_env_vars: PATH=/opt/local/bin:$PATH @@ -195,3 +205,4 @@ buildvariants: - name: compile - name: unit-test - name: fops + -- cgit v1.2.1 From 15bf184909f8fc69c93404e93f20ad5dcbe3c2c2 Mon Sep 17 00:00:00 2001 From: sueloverso Date: Thu, 15 Jun 2017 10:51:19 -0400 Subject: WT-3370 Reset metafile after it could be re-allocated. (#3465) This fixes a potential memory access after free in transaction recovery. --- src/txn/txn_recover.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c index 590e17b6a2a..58f4f0750d7 100644 --- a/src/txn/txn_recover.c +++ b/src/txn/txn_recover.c @@ -458,6 +458,11 @@ __wt_txn_recover(WT_SESSION_IMPL *session) * larger than any checkpoint LSN we have from the earlier time. */ WT_ERR(__recovery_file_scan(&r)); + /* + * The array can be re-allocated in recovery_file_scan. Reset + * our pointer after scanning all the files. + */ + metafile = &r.files[WT_METAFILE_ID]; conn->next_file_id = r.max_fileid; if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) && @@ -509,6 +514,11 @@ __wt_txn_recover(WT_SESSION_IMPL *session) /* Scan the metadata to find the live files and their IDs. */ WT_ERR(__recovery_file_scan(&r)); + /* + * Clear this out. We no longer need it and it could have been + * re-allocated when scanning the files. + */ + metafile = NULL; /* * We no longer need the metadata cursor: close it to avoid pinning any -- cgit v1.2.1 From c455dcfd99c4311838a194df917b63ceb61876f3 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 16 Jun 2017 13:08:49 -0400 Subject: WT-3371 Make Windows/MSVC build warnings fatal. (#3466) The exception is the SWIG-generated Python wrapper code. --- SConstruct | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/SConstruct b/SConstruct index 22f869e02e7..2661807594d 100644 --- a/SConstruct +++ b/SConstruct @@ -67,15 +67,11 @@ var.Add('CPPPATH', 'C Preprocessor include path', [ ]) var.Add('CFLAGS', 'C Compiler Flags', [ - "/Z7", # Generate debugging symbols "/wd4090", # Ignore warning about mismatched const qualifiers "/wd4996", # Ignore deprecated functions "/W3", # Warning level 3 - #"/we4244", # Possible loss of data - "/we4013", # Error on undefined functions - #"/we4047", # Indirection differences in types - #"/we4024", # Differences in parameter types - #"/we4100", # Unreferenced local parameter + "/WX", # Warnings are fatal + "/Z7", # Generate debugging symbols "/TC", # Compile as C code #"/Od", # Disable optimization "/Ob1", # inline expansion @@ -338,6 +334,8 @@ if GetOption("lang-python"): "-nodefaultctor", "-nodefaultdtor", ]) + # Ignore warnings in swig-generated code. + pythonEnv['CFLAGS'].remove("/WX") swiglib = pythonEnv.SharedLibrary('_wiredtiger', [ 'lang\python\wiredtiger.i'], -- cgit v1.2.1 From a79e471dada3a1a1aa298575882df4f4fb3aafaa Mon Sep 17 00:00:00 2001 From: Sulabh Mahajan Date: Wed, 21 Jun 2017 09:27:00 +1000 Subject: WT-3251 remove interim buffer when splitting during reconciliation (#3453) * Don't initialize second buffer unless needed * When moving data from previous to current image, grow buffer if needed * Fix a bug caused by keeping pointer across realloc * Address Dave's comments * Address Keith's review comments * Address comments --- src/reconcile/rec_write.c | 407 +++++++++++++++++++++++++--------------------- 1 file changed, 226 insertions(+), 181 deletions(-) diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index f7df73c4ecb..4c79893bd94 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -25,12 +25,25 @@ typedef struct { WT_PAGE *page; uint32_t flags; /* Caller's configuration */ - WT_ITEM disk_image; /* Temporary disk-image buffer */ /* - * Temporary buffer used to write out a disk image when managing two - * chunks worth of data in memory - */ - WT_ITEM *interim_buf; + * Reconciliation can end up requiring two temporary disk image buffers + * if a page split is involved. These two disk images are pointed to by + * current and the previous image pointers. During initialization the + * first image is allocated and pointed to by the current image pointer. + * If and when a split is involved the second image gets allocated and + * is pointed to by the current image pointer. The previous image + * pointer is made to refer the first image at this point. Two images + * are kept in memory to redistribute data among them in case the last + * split chunk ends up being smaller than the minimum required. As + * reconciliation generates more split chunks, the image referred to by + * the previous image pointer is written to the disk, the current and + * the previous image pointers are swapped, making space for another + * split chunk to be reconciled in the buffer that was just written out + * to the disk. + */ + WT_ITEM disk_image[2]; /* Temporary disk-image buffers */ + WT_ITEM *cur_img_ptr; + WT_ITEM *prev_img_ptr; /* * Track start/stop write generation to decide if all changes to the @@ -146,17 +159,6 @@ typedef struct { * that references all of our split pages. */ struct __rec_boundary { - /* - * Offset is the byte offset in the initial split buffer of the - * first byte of the split chunk, recorded before we decide to - * split the page; the difference between chunk[1]'s offset and - * chunk[0]'s offset is chunk[0]'s length. - * - * Once we split a page, we stop filling in offset values, we're - * writing the split chunks as we find them. - */ - size_t offset; /* Split's first byte */ - WT_ADDR addr; /* Split's written location */ uint32_t size; /* Split's size */ uint32_t checksum; /* Split's checksum */ @@ -832,7 +834,8 @@ __rec_write_init(WT_SESSION_IMPL *session, r->last = &r->_last; /* Disk buffers need to be aligned for writing. */ - F_SET(&r->disk_image, WT_ITEM_ALIGNED); + F_SET(&r->disk_image[0], WT_ITEM_ALIGNED); + F_SET(&r->disk_image[1], WT_ITEM_ALIGNED); } /* Reconciliation is not re-entrant, make sure that doesn't happen. */ @@ -977,8 +980,8 @@ __rec_destroy(WT_SESSION_IMPL *session, void *reconcilep) return; *(WT_RECONCILE **)reconcilep = NULL; - __wt_buf_free(session, &r->disk_image); - __wt_scr_free(session, &r->interim_buf); + __wt_buf_free(session, &r->disk_image[0]); + __wt_buf_free(session, &r->disk_image[1]); __wt_free(session, r->raw_entries); __wt_free(session, r->raw_offsets); @@ -1766,7 +1769,7 @@ __rec_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size) */ WT_ASSERT(session, r->space_avail >= size); WT_ASSERT(session, WT_BLOCK_FITS( - r->first_free, size, r->disk_image.mem, r->disk_image.memsize)); + r->first_free, size, r->cur_img_ptr->mem, r->cur_img_ptr->memsize)); r->entries += v; r->space_avail -= size; @@ -1853,7 +1856,7 @@ __rec_dict_replace( * copy cell instead. */ if (dp->offset == 0) - dp->offset = WT_PTRDIFF32(r->first_free, r->disk_image.mem); + dp->offset = WT_PTRDIFF32(r->first_free, r->cur_img_ptr->mem); else { /* * The offset is the byte offset from this cell to the previous, @@ -1861,7 +1864,7 @@ __rec_dict_replace( * page. */ offset = (uint64_t)WT_PTRDIFF(r->first_free, - (uint8_t *)r->disk_image.mem + dp->offset); + (uint8_t *)r->cur_img_ptr->mem + dp->offset); val->len = val->cell_len = __wt_cell_pack_copy(&val->cell, rle, offset); val->buf.data = NULL; @@ -1997,7 +2000,6 @@ __rec_leaf_page_max(WT_SESSION_IMPL *session, WT_RECONCILE *r) static void __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) { - bnd->offset = 0; bnd->max_bnd_recno = WT_RECNO_OOB; bnd->max_bnd_entries = 0; @@ -2210,15 +2212,14 @@ __rec_split_init(WT_SESSION_IMPL *session, * Ensure the disk image buffer is large enough for the max object, as * corrected by the underlying block manager. * - * The buffer that we build disk image in, needs to hold two chunks - * worth of data. Since we want to support split_size more than the page - * size (to allow for adjustments based on the compression), this buffer - * should be greater of twice of split_size and page_size. + * Since we want to support split_size more than the page size (to allow + * for adjustments based on the compression), this buffer should be + * greater of split_size and page_size. */ corrected_page_size = r->page_size; - disk_img_buf_size = 2 * WT_MAX(corrected_page_size, r->split_size); WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_init(session, &r->disk_image, disk_img_buf_size)); + disk_img_buf_size = WT_MAX(corrected_page_size, r->split_size); + WT_RET(__wt_buf_init(session, &r->disk_image[0], disk_img_buf_size)); /* * Clear the disk page header to ensure all of it is initialized, even @@ -2228,15 +2229,17 @@ __rec_split_init(WT_SESSION_IMPL *session, * fixed-length column-store sets bits in bytes, where the bytes are * assumed to initially be 0. */ - memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ? + memset(r->disk_image[0].mem, 0, page->type == WT_PAGE_COL_FIX ? disk_img_buf_size : WT_PAGE_HEADER_SIZE); /* * Set the page type (the type doesn't change, and setting it later * would require additional code in a few different places). */ - dsk = r->disk_image.mem; + dsk = r->disk_image[0].mem; dsk->type = page->type; + r->cur_img_ptr = &r->disk_image[0]; + r->prev_img_ptr = NULL; r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); @@ -2245,7 +2248,6 @@ __rec_split_init(WT_SESSION_IMPL *session, WT_RET(__rec_split_bnd_grow(session, r)); __rec_split_bnd_init(session, &r->bnd[0]); r->bnd[0].max_bnd_recno = recno; - r->bnd[0].offset = WT_PAGE_HEADER_BYTE_SIZE(btree); /* Initialize the entry counter. */ r->entries = 0; @@ -2451,21 +2453,18 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len) { WT_BM *bm; WT_BTREE *btree; - size_t corrected_page_size, inuse, len; + size_t corrected_page_size, inuse; btree = S2BT(session); bm = btree->bm; - len = WT_PTRDIFF(r->first_free, r->disk_image.mem); - inuse = (len - r->bnd[r->bnd_next].offset) + - WT_PAGE_HEADER_BYTE_SIZE(btree); + inuse = WT_PTRDIFF(r->first_free, r->cur_img_ptr->mem); corrected_page_size = inuse + add_len; WT_RET(bm->write_size(bm, session, &corrected_page_size)); - /* Need to account for buffer carrying two chunks worth of data */ - WT_RET(__wt_buf_grow(session, &r->disk_image, 2 * corrected_page_size)); + WT_RET(__wt_buf_grow(session, r->cur_img_ptr, corrected_page_size)); - r->first_free = (uint8_t *)r->disk_image.mem + len; + r->first_free = (uint8_t *)r->cur_img_ptr->mem + inuse; WT_ASSERT(session, corrected_page_size >= inuse); r->space_avail = corrected_page_size - inuse; WT_ASSERT(session, r->space_avail >= add_len); @@ -2474,89 +2473,55 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len) } /* - * __rec_split_write_prev_and_shift_cur -- - * Write the previous split chunk to the disk as a page. Shift the contents - * of the current chunk to the start of the buffer, making space for a new - * chunk to be written. - * If the caller asks for a chunk resizing, the boundary between the two - * chunks is readjusted to the minimum split size boundary details stored - * in the previous chunk, letting the current chunk grow at the cost of the - * previous chunk. + * __rec_split_write_prev_and_swap_buf -- + * If there is a previous split chunk held in the memory, write it to the + * disk as a page. If there isn't one, this is the first time we are + * splitting and need to initialize a second buffer. Also, swap the + * previous and the current buffer pointers. */ static int -__rec_split_write_prev_and_shift_cur( - WT_SESSION_IMPL *session, WT_RECONCILE *r, bool resize_chunks) +__rec_split_write_prev_and_swap_buf(WT_SESSION_IMPL *session, WT_RECONCILE *r) { - WT_BM *bm; - WT_BOUNDARY *bnd_cur, *bnd_prev; - WT_BTREE *btree; - WT_PAGE_HEADER *dsk, *dsk_tmp; - size_t cur_len, len; - uint8_t *dsk_start; - - WT_ASSERT(session, r->bnd_next != 0); - - btree = S2BT(session); - bm = btree->bm; - bnd_cur = &r->bnd[r->bnd_next]; - bnd_prev = bnd_cur - 1; - dsk = r->disk_image.mem; - cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; - - /* - * Resize chunks if the current is smaller than the minimum, and there - * are details on the minimum split size boundary available in the - * previous boundary details. - * - * There is a possibility that we do not have a minimum boundary set, in - * such a case we skip chunk resizing. Such a condition is possible for - * instance when we are building the image in the buffer and the first - * K/V pair is large enough that it surpasses both the minimum split - * size and the split size the application has set. In such a case we - * split the chunk without saving any minimum boundary. - */ - if (resize_chunks && - cur_len < r->min_split_size && bnd_prev->min_bnd_offset != 0) { - bnd_cur->offset = bnd_prev->min_bnd_offset; - bnd_cur->max_bnd_entries += - bnd_prev->max_bnd_entries - bnd_prev->min_bnd_entries; - bnd_prev->max_bnd_entries = bnd_prev->min_bnd_entries; - bnd_cur->max_bnd_recno = bnd_prev->min_bnd_recno; - - WT_RET(__wt_buf_set(session, &bnd_cur->max_bnd_key, - bnd_prev->min_bnd_key.data, bnd_prev->min_bnd_key.size)); - - /* Update current chunk's length */ - cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; + WT_BOUNDARY *bnd_prev; + WT_ITEM *tmp_img_ptr; + WT_PAGE_HEADER *dsk; + size_t disk_img_size; + + WT_ASSERT(session, (r->prev_img_ptr == NULL && r->bnd_next == 0) || + (r->prev_img_ptr != NULL && r->bnd_next != 0)); + + /* Write previous chunk, if there is one */ + if (r->prev_img_ptr != NULL) { + bnd_prev = &r->bnd[r->bnd_next - 1]; + dsk = r->prev_img_ptr->mem; + dsk->recno = bnd_prev->max_bnd_recno; + dsk->u.entries = bnd_prev->max_bnd_entries; + dsk->mem_size = (uint32_t)bnd_prev->size; + r->prev_img_ptr->size = dsk->mem_size; + WT_RET(__rec_split_write(session, + r, bnd_prev, r->prev_img_ptr, false)); + } else { + /* + * If we do not have a previous buffer, we should initialize the + * second buffer before proceeding. We will create the second + * buffer of the same size as the current buffer. + */ + disk_img_size = r->cur_img_ptr->memsize; + WT_RET(__wt_buf_init(session, + &r->disk_image[1], disk_img_size)); + r->prev_img_ptr = &r->disk_image[1]; + dsk = r->prev_img_ptr->mem; + memset(dsk, 0, + r->page->type == WT_PAGE_COL_FIX ? + disk_img_size : WT_PAGE_HEADER_SIZE); + dsk->type = r->page->type; } - /* - * Create an interim buffer if not already done to prepare the previous - * chunk's disk image. - */ - len = bnd_cur->offset; - WT_RET(bm->write_size(bm, session, &len)); - if (r->interim_buf == NULL) - WT_RET(__wt_scr_alloc(session, len, &r->interim_buf)); - else - WT_RET(__wt_buf_init(session, r->interim_buf, len)); - - dsk_tmp = r->interim_buf->mem; - memcpy(dsk_tmp, dsk, bnd_cur->offset); - dsk_tmp->recno = bnd_prev->max_bnd_recno; - dsk_tmp->u.entries = bnd_prev->max_bnd_entries; - dsk_tmp->mem_size = WT_STORE_SIZE(bnd_cur->offset); - r->interim_buf->size = dsk_tmp->mem_size; - WT_RET(__rec_split_write(session, r, bnd_prev, r->interim_buf, false)); - - /* Shift the current chunk to the start of the buffer */ - dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); - (void)memmove(dsk_start, (uint8_t *)dsk + bnd_cur->offset, cur_len); - - /* Fix boundary offset */ - bnd_cur->offset = WT_PAGE_HEADER_BYTE_SIZE(btree); - /* Fix where free points */ - r->first_free = dsk_start + cur_len; + /* swap previous and current buffers */ + tmp_img_ptr = r->prev_img_ptr; + r->prev_img_ptr = r->cur_img_ptr; + r->cur_img_ptr = tmp_img_ptr; + return (0); } @@ -2574,7 +2539,7 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) size_t inuse; btree = S2BT(session); - dsk = r->disk_image.mem; + dsk = r->cur_img_ptr->mem; /* Fixed length col store can call with next_len 0 */ WT_ASSERT(session, next_len == 0 || r->space_avail < next_len); @@ -2588,9 +2553,7 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) "%s page too large, attempted split during salvage", __wt_page_type_string(r->page->type)); - last = &r->bnd[r->bnd_next]; - inuse = (WT_PTRDIFF(r->first_free, dsk) - last->offset) + - WT_PAGE_HEADER_BYTE_SIZE(btree); + inuse = WT_PTRDIFF(r->first_free, dsk); /* * We can get here if the first key/value pair won't fit. @@ -2603,8 +2566,10 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) /* All page boundaries reset the dictionary. */ __rec_dictionary_reset(r); - /* Set the number of entries for the just finished chunk. */ + /* Set the number of entries and size for the just finished chunk. */ + last = &r->bnd[r->bnd_next]; last->max_bnd_entries = r->entries; + last->size = (uint32_t)inuse; /* * In case of bulk load, write out chunks as we get them. Otherwise we @@ -2616,19 +2581,22 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) dsk->recno = last->max_bnd_recno; dsk->u.entries = last->max_bnd_entries; dsk->mem_size = (uint32_t)inuse; - r->disk_image.size = dsk->mem_size; - WT_RET(__rec_split_write( - session, r, last, &r->disk_image, false)); - /* Fix where free points */ - r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); - } else if (r->bnd_next != 0) - WT_RET(__rec_split_write_prev_and_shift_cur(session, r, false)); + r->cur_img_ptr->size = dsk->mem_size; + WT_RET(__rec_split_write(session, + r, last, r->cur_img_ptr, false)); + } else { + WT_RET(__rec_split_write_prev_and_swap_buf(session, r)); + /* current image we are writing to has changed */ + dsk = r->cur_img_ptr->mem; + } + + /* Fix where free points */ + r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); /* Prepare the next boundary */ WT_RET(__rec_split_bnd_grow(session, r)); r->bnd_next++; next = &r->bnd[r->bnd_next]; - next->offset = WT_PTRDIFF(r->first_free, dsk); /* Set the key for the next chunk. */ next->max_bnd_recno = r->recno; if (dsk->type == WT_PAGE_ROW_INT || dsk->type == WT_PAGE_ROW_LEAF) @@ -2687,9 +2655,8 @@ __rec_split_crossing_bnd( !WT_CROSSING_SPLIT_BND(r, next_len)) { btree = S2BT(session); bnd = &r->bnd[r->bnd_next]; - dsk = r->disk_image.mem; - min_bnd_offset = (WT_PTRDIFF(r->first_free, dsk) - - bnd->offset) + WT_PAGE_HEADER_BYTE_SIZE(btree); + dsk = r->cur_img_ptr->mem; + min_bnd_offset = WT_PTRDIFF(r->first_free, dsk); if (min_bnd_offset == WT_PAGE_HEADER_BYTE_SIZE(btree)) /* * This is possible if the first record doesn't fit in @@ -2750,7 +2717,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, unpack = &_unpack; compressor = btree->compressor; dst = &r->raw_destination; - dsk = r->disk_image.mem; + dsk = r->cur_img_ptr->mem; WT_RET(__rec_split_bnd_grow(session, r)); last = &r->bnd[r->bnd_next]; @@ -3066,7 +3033,7 @@ no_slots: r->first_free = dsk_start + len; r->space_avail += r->raw_offsets[result_slots]; WT_ASSERT(session, r->first_free + r->space_avail <= - (uint8_t *)r->disk_image.mem + r->disk_image.memsize); + (uint8_t *)r->cur_img_ptr->mem + r->cur_img_ptr->memsize); /* * Set the key for the next block (before writing the block, a @@ -3105,13 +3072,13 @@ no_slots: dsk->recno = last->max_bnd_recno; dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); dsk->u.entries = r->entries; - r->disk_image.size = dsk->mem_size; + r->cur_img_ptr->size = dsk->mem_size; r->entries = 0; r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); r->space_avail = r->page_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - write_ref = &r->disk_image; + write_ref = r->cur_img_ptr; last->already_compressed = false; } else { /* @@ -3139,7 +3106,7 @@ no_slots: last_block && __rec_is_checkpoint(session, r, last)) { if (write_ref == dst) WT_RET(__wt_buf_set( - session, &r->disk_image, dst->mem, dst->size)); + session, r->cur_img_ptr, dst->mem, dst->size)); } else WT_RET( __rec_split_write(session, r, last, write_ref, last_block)); @@ -3172,6 +3139,111 @@ __rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) return (__rec_split_raw_worker(session, r, next_len, false)); } +/* + * __rec_split_finish_process_prev -- + * If the two split chunks together fit in a single page, merge them into + * one. If they do not fit in a single page but the last is smaller than + * the minimum desired, move some data from the penultimate chunk to the + * last chunk and write out the previous/penultimate. Finally, update the + * pointer to the current image buffer. After this function exits, we will + * have one (last) buffer in memory, pointed to by the current image + * pointer. + */ +static int +__rec_split_finish_process_prev( + WT_SESSION_IMPL *session, WT_RECONCILE *r, bool *chunks_merged) +{ + WT_BOUNDARY *bnd_cur, *bnd_prev; + WT_BTREE *btree; + WT_PAGE_HEADER *dsk; + size_t len_to_move; + uint32_t combined_size; + uint8_t *cur_dsk_start; + + WT_ASSERT(session, r->prev_img_ptr != NULL); + + btree = S2BT(session); + bnd_cur = &r->bnd[r->bnd_next]; + bnd_prev = bnd_cur - 1; + *chunks_merged = false; + /* + * The sizes referred to in the boundary structure include the header, + * so when calculating the combined size, make sure not to include the + * header twice. + */ + combined_size = bnd_prev->size + + (bnd_cur->size - WT_PAGE_HEADER_BYTE_SIZE(btree)); + + if (combined_size <= r->page_size) { + /* + * We have two boundaries, but the data in the buffers can fit a + * single page. Merge the boundaries and create a single chunk. + */ + dsk = r->cur_img_ptr->mem; + memcpy((uint8_t *)r->prev_img_ptr->mem + bnd_prev->size, + WT_PAGE_HEADER_BYTE(btree, dsk), + bnd_cur->size - WT_PAGE_HEADER_BYTE_SIZE(btree)); + bnd_prev->size = combined_size; + bnd_prev->max_bnd_entries += bnd_cur->max_bnd_entries; + r->bnd_next--; + *chunks_merged = true; + } else { + if (bnd_cur->size < r->min_split_size && + bnd_prev->min_bnd_offset != 0 ) { + /* + * The last chunk, pointed to by the current image + * pointer, has less than the minimum data. Let's move + * any data more than the minimum from the previous + * image into the current. + */ + len_to_move = bnd_prev->size - bnd_prev->min_bnd_offset; + /* Grow current buffer if it is not large enough */ + if (r->space_avail < len_to_move) + WT_RET(__rec_split_grow(session, + r, len_to_move)); + cur_dsk_start = WT_PAGE_HEADER_BYTE(btree, + r->cur_img_ptr->mem); + + /* + * Shift the contents of the current buffer to make + * space for the data that will be prepended into the + * current buffer + */ + memmove(cur_dsk_start + len_to_move, + cur_dsk_start, bnd_cur->size - + WT_PAGE_HEADER_BYTE_SIZE(btree)); + /* + * copy any data more than the minimum, from the + * previous buffer to the start of the current. + */ + memcpy(cur_dsk_start, (uint8_t *)r->prev_img_ptr->mem + + bnd_prev->min_bnd_offset, len_to_move); + + /* Update boundary information */ + bnd_cur->size += len_to_move; + bnd_prev->size -= len_to_move; + bnd_cur->max_bnd_entries += bnd_prev->max_bnd_entries - + bnd_prev->min_bnd_entries; + bnd_prev->max_bnd_entries = bnd_prev->min_bnd_entries; + bnd_cur->max_bnd_recno = bnd_prev->min_bnd_recno; + WT_RET(__wt_buf_set(session, + &bnd_cur->max_bnd_key, bnd_prev->min_bnd_key.data, + bnd_prev->min_bnd_key.size)); + } + + /* Write out the previous image */ + WT_RET(__rec_split_write_prev_and_swap_buf(session, r)); + } + + /* + * At this point, there is only one disk image in the memory, pointed to + * by the previous image pointer. Update the current image pointer to + * this image. + */ + r->cur_img_ptr = r->prev_img_ptr; + return (0); +} + /* * __rec_split_finish_std -- * Finish processing a page, standard version. @@ -3179,9 +3251,9 @@ __rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) static int __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) { - WT_BOUNDARY *bnd_cur, *bnd_prev; + WT_BOUNDARY *bnd_cur; WT_PAGE_HEADER *dsk; - bool grow_bnd; + bool chunks_merged; /* * We may arrive here with no entries to write if the page was entirely @@ -3208,50 +3280,22 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) return (EBUSY); } - dsk = r->disk_image.mem; - - /* Set the number of entries for the just finished chunk. */ + /* Set the number of entries and size for the just finished chunk. */ bnd_cur = &r->bnd[r->bnd_next]; bnd_cur->max_bnd_entries = r->entries; + bnd_cur->size = WT_PTRDIFF32(r->first_free, r->cur_img_ptr->mem); - grow_bnd = true; - /* - * We can reach here even with raw_compression when the last split chunk - * is too small to be sent for raw compression. - */ - if (!r->is_bulk_load && !r->raw_compression) { - if (WT_PTRDIFF(r->first_free, dsk) > r->page_size && - r->bnd_next != 0) { - /* - * We hold two boundaries worth of data in the buffer, - * and this data doesn't fit in a single page. If the - * last chunk is too small, readjust the boundary to a - * pre-computed minimum. - * Write out the penultimate chunk to the disk as a page - */ - WT_RET(__rec_split_write_prev_and_shift_cur( - session, r, true)); - } else - if (r->bnd_next != 0) { - /* - * We have two boundaries, but the data in the - * buffer can fit a single page. Merge the - * boundaries to create a single chunk. - */ - bnd_prev = bnd_cur - 1; - bnd_prev->max_bnd_entries += - bnd_cur->max_bnd_entries; - r->bnd_next--; - grow_bnd = false; - } - } + chunks_merged = false; + if (r->prev_img_ptr != NULL) + WT_RET(__rec_split_finish_process_prev(session, + r, &chunks_merged)); /* * We already have space for an extra boundary if we merged two * boundaries above, in that case we do not need to grow the boundary * structure. */ - if (grow_bnd) + if (!chunks_merged) WT_RET(__rec_split_bnd_grow(session, r)); bnd_cur = &r->bnd[r->bnd_next]; r->bnd_next++; @@ -3260,14 +3304,15 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) * Current boundary now has all the remaining data/last page now. * Let's write it to the disk */ + dsk = r->cur_img_ptr->mem; dsk->recno = bnd_cur->max_bnd_recno; dsk->u.entries = bnd_cur->max_bnd_entries; - dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); - r->disk_image.size = dsk->mem_size; + dsk->mem_size = bnd_cur->size; + r->cur_img_ptr->size = dsk->mem_size; /* If this is a checkpoint, we're done, otherwise write the page. */ return (__rec_is_checkpoint(session, r, bnd_cur) ? - 0 : __rec_split_write(session, r, bnd_cur, &r->disk_image, true)); + 0 : __rec_split_write(session, r, bnd_cur, r->cur_img_ptr, true)); } /* @@ -3289,7 +3334,7 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) if (r->raw_compression && r->entries != 0) { while (r->entries != 0) { data_size = - WT_PTRDIFF(r->first_free, r->disk_image.mem); + WT_PTRDIFF(r->first_free, r->cur_img_ptr->mem); if (data_size <= btree->allocsize) break; WT_RET(__rec_split_raw_worker(session, r, 0, true)); @@ -5882,7 +5927,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) * write the buffer so we know what to do here. */ if (bnd->addr.addr == NULL) - WT_RET(__wt_bt_write(session, &r->disk_image, + WT_RET(__wt_bt_write(session, r->cur_img_ptr, NULL, NULL, true, F_ISSET(r, WT_CHECKPOINTING), bnd->already_compressed)); else { @@ -6546,7 +6591,7 @@ __rec_dictionary_lookup( for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash); dp != NULL && dp->hash == hash; dp = dp->next[0]) { WT_RET(__wt_cell_pack_data_match( - (WT_CELL *)((uint8_t *)r->disk_image.mem + dp->offset), + (WT_CELL *)((uint8_t *)r->cur_img_ptr->mem + dp->offset), &val->cell, val->buf.data, &match)); if (match) { WT_STAT_DATA_INCR(session, rec_dictionary); -- cgit v1.2.1 From 80e031fb1b484a9786d921670ae0742c422c4a59 Mon Sep 17 00:00:00 2001 From: Sulabh Mahajan Date: Wed, 21 Jun 2017 10:25:08 +1000 Subject: Revert "WT-3251 remove interim buffer when splitting during reconciliation (#3453)" (#3468) This reverts commit a79e471dada3a1a1aa298575882df4f4fb3aafaa. --- src/reconcile/rec_write.c | 407 +++++++++++++++++++++------------------------- 1 file changed, 181 insertions(+), 226 deletions(-) diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 4c79893bd94..f7df73c4ecb 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -25,25 +25,12 @@ typedef struct { WT_PAGE *page; uint32_t flags; /* Caller's configuration */ + WT_ITEM disk_image; /* Temporary disk-image buffer */ /* - * Reconciliation can end up requiring two temporary disk image buffers - * if a page split is involved. These two disk images are pointed to by - * current and the previous image pointers. During initialization the - * first image is allocated and pointed to by the current image pointer. - * If and when a split is involved the second image gets allocated and - * is pointed to by the current image pointer. The previous image - * pointer is made to refer the first image at this point. Two images - * are kept in memory to redistribute data among them in case the last - * split chunk ends up being smaller than the minimum required. As - * reconciliation generates more split chunks, the image referred to by - * the previous image pointer is written to the disk, the current and - * the previous image pointers are swapped, making space for another - * split chunk to be reconciled in the buffer that was just written out - * to the disk. - */ - WT_ITEM disk_image[2]; /* Temporary disk-image buffers */ - WT_ITEM *cur_img_ptr; - WT_ITEM *prev_img_ptr; + * Temporary buffer used to write out a disk image when managing two + * chunks worth of data in memory + */ + WT_ITEM *interim_buf; /* * Track start/stop write generation to decide if all changes to the @@ -159,6 +146,17 @@ typedef struct { * that references all of our split pages. */ struct __rec_boundary { + /* + * Offset is the byte offset in the initial split buffer of the + * first byte of the split chunk, recorded before we decide to + * split the page; the difference between chunk[1]'s offset and + * chunk[0]'s offset is chunk[0]'s length. + * + * Once we split a page, we stop filling in offset values, we're + * writing the split chunks as we find them. + */ + size_t offset; /* Split's first byte */ + WT_ADDR addr; /* Split's written location */ uint32_t size; /* Split's size */ uint32_t checksum; /* Split's checksum */ @@ -834,8 +832,7 @@ __rec_write_init(WT_SESSION_IMPL *session, r->last = &r->_last; /* Disk buffers need to be aligned for writing. */ - F_SET(&r->disk_image[0], WT_ITEM_ALIGNED); - F_SET(&r->disk_image[1], WT_ITEM_ALIGNED); + F_SET(&r->disk_image, WT_ITEM_ALIGNED); } /* Reconciliation is not re-entrant, make sure that doesn't happen. */ @@ -980,8 +977,8 @@ __rec_destroy(WT_SESSION_IMPL *session, void *reconcilep) return; *(WT_RECONCILE **)reconcilep = NULL; - __wt_buf_free(session, &r->disk_image[0]); - __wt_buf_free(session, &r->disk_image[1]); + __wt_buf_free(session, &r->disk_image); + __wt_scr_free(session, &r->interim_buf); __wt_free(session, r->raw_entries); __wt_free(session, r->raw_offsets); @@ -1769,7 +1766,7 @@ __rec_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size) */ WT_ASSERT(session, r->space_avail >= size); WT_ASSERT(session, WT_BLOCK_FITS( - r->first_free, size, r->cur_img_ptr->mem, r->cur_img_ptr->memsize)); + r->first_free, size, r->disk_image.mem, r->disk_image.memsize)); r->entries += v; r->space_avail -= size; @@ -1856,7 +1853,7 @@ __rec_dict_replace( * copy cell instead. */ if (dp->offset == 0) - dp->offset = WT_PTRDIFF32(r->first_free, r->cur_img_ptr->mem); + dp->offset = WT_PTRDIFF32(r->first_free, r->disk_image.mem); else { /* * The offset is the byte offset from this cell to the previous, @@ -1864,7 +1861,7 @@ __rec_dict_replace( * page. */ offset = (uint64_t)WT_PTRDIFF(r->first_free, - (uint8_t *)r->cur_img_ptr->mem + dp->offset); + (uint8_t *)r->disk_image.mem + dp->offset); val->len = val->cell_len = __wt_cell_pack_copy(&val->cell, rle, offset); val->buf.data = NULL; @@ -2000,6 +1997,7 @@ __rec_leaf_page_max(WT_SESSION_IMPL *session, WT_RECONCILE *r) static void __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) { + bnd->offset = 0; bnd->max_bnd_recno = WT_RECNO_OOB; bnd->max_bnd_entries = 0; @@ -2212,14 +2210,15 @@ __rec_split_init(WT_SESSION_IMPL *session, * Ensure the disk image buffer is large enough for the max object, as * corrected by the underlying block manager. * - * Since we want to support split_size more than the page size (to allow - * for adjustments based on the compression), this buffer should be - * greater of split_size and page_size. + * The buffer that we build disk image in, needs to hold two chunks + * worth of data. Since we want to support split_size more than the page + * size (to allow for adjustments based on the compression), this buffer + * should be greater of twice of split_size and page_size. */ corrected_page_size = r->page_size; + disk_img_buf_size = 2 * WT_MAX(corrected_page_size, r->split_size); WT_RET(bm->write_size(bm, session, &corrected_page_size)); - disk_img_buf_size = WT_MAX(corrected_page_size, r->split_size); - WT_RET(__wt_buf_init(session, &r->disk_image[0], disk_img_buf_size)); + WT_RET(__wt_buf_init(session, &r->disk_image, disk_img_buf_size)); /* * Clear the disk page header to ensure all of it is initialized, even @@ -2229,17 +2228,15 @@ __rec_split_init(WT_SESSION_IMPL *session, * fixed-length column-store sets bits in bytes, where the bytes are * assumed to initially be 0. */ - memset(r->disk_image[0].mem, 0, page->type == WT_PAGE_COL_FIX ? + memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ? disk_img_buf_size : WT_PAGE_HEADER_SIZE); /* * Set the page type (the type doesn't change, and setting it later * would require additional code in a few different places). */ - dsk = r->disk_image[0].mem; + dsk = r->disk_image.mem; dsk->type = page->type; - r->cur_img_ptr = &r->disk_image[0]; - r->prev_img_ptr = NULL; r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); @@ -2248,6 +2245,7 @@ __rec_split_init(WT_SESSION_IMPL *session, WT_RET(__rec_split_bnd_grow(session, r)); __rec_split_bnd_init(session, &r->bnd[0]); r->bnd[0].max_bnd_recno = recno; + r->bnd[0].offset = WT_PAGE_HEADER_BYTE_SIZE(btree); /* Initialize the entry counter. */ r->entries = 0; @@ -2453,18 +2451,21 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len) { WT_BM *bm; WT_BTREE *btree; - size_t corrected_page_size, inuse; + size_t corrected_page_size, inuse, len; btree = S2BT(session); bm = btree->bm; - inuse = WT_PTRDIFF(r->first_free, r->cur_img_ptr->mem); + len = WT_PTRDIFF(r->first_free, r->disk_image.mem); + inuse = (len - r->bnd[r->bnd_next].offset) + + WT_PAGE_HEADER_BYTE_SIZE(btree); corrected_page_size = inuse + add_len; WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_grow(session, r->cur_img_ptr, corrected_page_size)); + /* Need to account for buffer carrying two chunks worth of data */ + WT_RET(__wt_buf_grow(session, &r->disk_image, 2 * corrected_page_size)); - r->first_free = (uint8_t *)r->cur_img_ptr->mem + inuse; + r->first_free = (uint8_t *)r->disk_image.mem + len; WT_ASSERT(session, corrected_page_size >= inuse); r->space_avail = corrected_page_size - inuse; WT_ASSERT(session, r->space_avail >= add_len); @@ -2473,55 +2474,89 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len) } /* - * __rec_split_write_prev_and_swap_buf -- - * If there is a previous split chunk held in the memory, write it to the - * disk as a page. If there isn't one, this is the first time we are - * splitting and need to initialize a second buffer. Also, swap the - * previous and the current buffer pointers. + * __rec_split_write_prev_and_shift_cur -- + * Write the previous split chunk to the disk as a page. Shift the contents + * of the current chunk to the start of the buffer, making space for a new + * chunk to be written. + * If the caller asks for a chunk resizing, the boundary between the two + * chunks is readjusted to the minimum split size boundary details stored + * in the previous chunk, letting the current chunk grow at the cost of the + * previous chunk. */ static int -__rec_split_write_prev_and_swap_buf(WT_SESSION_IMPL *session, WT_RECONCILE *r) +__rec_split_write_prev_and_shift_cur( + WT_SESSION_IMPL *session, WT_RECONCILE *r, bool resize_chunks) { - WT_BOUNDARY *bnd_prev; - WT_ITEM *tmp_img_ptr; - WT_PAGE_HEADER *dsk; - size_t disk_img_size; - - WT_ASSERT(session, (r->prev_img_ptr == NULL && r->bnd_next == 0) || - (r->prev_img_ptr != NULL && r->bnd_next != 0)); - - /* Write previous chunk, if there is one */ - if (r->prev_img_ptr != NULL) { - bnd_prev = &r->bnd[r->bnd_next - 1]; - dsk = r->prev_img_ptr->mem; - dsk->recno = bnd_prev->max_bnd_recno; - dsk->u.entries = bnd_prev->max_bnd_entries; - dsk->mem_size = (uint32_t)bnd_prev->size; - r->prev_img_ptr->size = dsk->mem_size; - WT_RET(__rec_split_write(session, - r, bnd_prev, r->prev_img_ptr, false)); - } else { - /* - * If we do not have a previous buffer, we should initialize the - * second buffer before proceeding. We will create the second - * buffer of the same size as the current buffer. - */ - disk_img_size = r->cur_img_ptr->memsize; - WT_RET(__wt_buf_init(session, - &r->disk_image[1], disk_img_size)); - r->prev_img_ptr = &r->disk_image[1]; - dsk = r->prev_img_ptr->mem; - memset(dsk, 0, - r->page->type == WT_PAGE_COL_FIX ? - disk_img_size : WT_PAGE_HEADER_SIZE); - dsk->type = r->page->type; - } + WT_BM *bm; + WT_BOUNDARY *bnd_cur, *bnd_prev; + WT_BTREE *btree; + WT_PAGE_HEADER *dsk, *dsk_tmp; + size_t cur_len, len; + uint8_t *dsk_start; + + WT_ASSERT(session, r->bnd_next != 0); + + btree = S2BT(session); + bm = btree->bm; + bnd_cur = &r->bnd[r->bnd_next]; + bnd_prev = bnd_cur - 1; + dsk = r->disk_image.mem; + cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; - /* swap previous and current buffers */ - tmp_img_ptr = r->prev_img_ptr; - r->prev_img_ptr = r->cur_img_ptr; - r->cur_img_ptr = tmp_img_ptr; + /* + * Resize chunks if the current is smaller than the minimum, and there + * are details on the minimum split size boundary available in the + * previous boundary details. + * + * There is a possibility that we do not have a minimum boundary set, in + * such a case we skip chunk resizing. Such a condition is possible for + * instance when we are building the image in the buffer and the first + * K/V pair is large enough that it surpasses both the minimum split + * size and the split size the application has set. In such a case we + * split the chunk without saving any minimum boundary. + */ + if (resize_chunks && + cur_len < r->min_split_size && bnd_prev->min_bnd_offset != 0) { + bnd_cur->offset = bnd_prev->min_bnd_offset; + bnd_cur->max_bnd_entries += + bnd_prev->max_bnd_entries - bnd_prev->min_bnd_entries; + bnd_prev->max_bnd_entries = bnd_prev->min_bnd_entries; + bnd_cur->max_bnd_recno = bnd_prev->min_bnd_recno; + + WT_RET(__wt_buf_set(session, &bnd_cur->max_bnd_key, + bnd_prev->min_bnd_key.data, bnd_prev->min_bnd_key.size)); + + /* Update current chunk's length */ + cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; + } + /* + * Create an interim buffer if not already done to prepare the previous + * chunk's disk image. + */ + len = bnd_cur->offset; + WT_RET(bm->write_size(bm, session, &len)); + if (r->interim_buf == NULL) + WT_RET(__wt_scr_alloc(session, len, &r->interim_buf)); + else + WT_RET(__wt_buf_init(session, r->interim_buf, len)); + + dsk_tmp = r->interim_buf->mem; + memcpy(dsk_tmp, dsk, bnd_cur->offset); + dsk_tmp->recno = bnd_prev->max_bnd_recno; + dsk_tmp->u.entries = bnd_prev->max_bnd_entries; + dsk_tmp->mem_size = WT_STORE_SIZE(bnd_cur->offset); + r->interim_buf->size = dsk_tmp->mem_size; + WT_RET(__rec_split_write(session, r, bnd_prev, r->interim_buf, false)); + + /* Shift the current chunk to the start of the buffer */ + dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); + (void)memmove(dsk_start, (uint8_t *)dsk + bnd_cur->offset, cur_len); + + /* Fix boundary offset */ + bnd_cur->offset = WT_PAGE_HEADER_BYTE_SIZE(btree); + /* Fix where free points */ + r->first_free = dsk_start + cur_len; return (0); } @@ -2539,7 +2574,7 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) size_t inuse; btree = S2BT(session); - dsk = r->cur_img_ptr->mem; + dsk = r->disk_image.mem; /* Fixed length col store can call with next_len 0 */ WT_ASSERT(session, next_len == 0 || r->space_avail < next_len); @@ -2553,7 +2588,9 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) "%s page too large, attempted split during salvage", __wt_page_type_string(r->page->type)); - inuse = WT_PTRDIFF(r->first_free, dsk); + last = &r->bnd[r->bnd_next]; + inuse = (WT_PTRDIFF(r->first_free, dsk) - last->offset) + + WT_PAGE_HEADER_BYTE_SIZE(btree); /* * We can get here if the first key/value pair won't fit. @@ -2566,10 +2603,8 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) /* All page boundaries reset the dictionary. */ __rec_dictionary_reset(r); - /* Set the number of entries and size for the just finished chunk. */ - last = &r->bnd[r->bnd_next]; + /* Set the number of entries for the just finished chunk. */ last->max_bnd_entries = r->entries; - last->size = (uint32_t)inuse; /* * In case of bulk load, write out chunks as we get them. Otherwise we @@ -2581,22 +2616,19 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) dsk->recno = last->max_bnd_recno; dsk->u.entries = last->max_bnd_entries; dsk->mem_size = (uint32_t)inuse; - r->cur_img_ptr->size = dsk->mem_size; - WT_RET(__rec_split_write(session, - r, last, r->cur_img_ptr, false)); - } else { - WT_RET(__rec_split_write_prev_and_swap_buf(session, r)); - /* current image we are writing to has changed */ - dsk = r->cur_img_ptr->mem; - } - - /* Fix where free points */ - r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); + r->disk_image.size = dsk->mem_size; + WT_RET(__rec_split_write( + session, r, last, &r->disk_image, false)); + /* Fix where free points */ + r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); + } else if (r->bnd_next != 0) + WT_RET(__rec_split_write_prev_and_shift_cur(session, r, false)); /* Prepare the next boundary */ WT_RET(__rec_split_bnd_grow(session, r)); r->bnd_next++; next = &r->bnd[r->bnd_next]; + next->offset = WT_PTRDIFF(r->first_free, dsk); /* Set the key for the next chunk. */ next->max_bnd_recno = r->recno; if (dsk->type == WT_PAGE_ROW_INT || dsk->type == WT_PAGE_ROW_LEAF) @@ -2655,8 +2687,9 @@ __rec_split_crossing_bnd( !WT_CROSSING_SPLIT_BND(r, next_len)) { btree = S2BT(session); bnd = &r->bnd[r->bnd_next]; - dsk = r->cur_img_ptr->mem; - min_bnd_offset = WT_PTRDIFF(r->first_free, dsk); + dsk = r->disk_image.mem; + min_bnd_offset = (WT_PTRDIFF(r->first_free, dsk) - + bnd->offset) + WT_PAGE_HEADER_BYTE_SIZE(btree); if (min_bnd_offset == WT_PAGE_HEADER_BYTE_SIZE(btree)) /* * This is possible if the first record doesn't fit in @@ -2717,7 +2750,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, unpack = &_unpack; compressor = btree->compressor; dst = &r->raw_destination; - dsk = r->cur_img_ptr->mem; + dsk = r->disk_image.mem; WT_RET(__rec_split_bnd_grow(session, r)); last = &r->bnd[r->bnd_next]; @@ -3033,7 +3066,7 @@ no_slots: r->first_free = dsk_start + len; r->space_avail += r->raw_offsets[result_slots]; WT_ASSERT(session, r->first_free + r->space_avail <= - (uint8_t *)r->cur_img_ptr->mem + r->cur_img_ptr->memsize); + (uint8_t *)r->disk_image.mem + r->disk_image.memsize); /* * Set the key for the next block (before writing the block, a @@ -3072,13 +3105,13 @@ no_slots: dsk->recno = last->max_bnd_recno; dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); dsk->u.entries = r->entries; - r->cur_img_ptr->size = dsk->mem_size; + r->disk_image.size = dsk->mem_size; r->entries = 0; r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); r->space_avail = r->page_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - write_ref = r->cur_img_ptr; + write_ref = &r->disk_image; last->already_compressed = false; } else { /* @@ -3106,7 +3139,7 @@ no_slots: last_block && __rec_is_checkpoint(session, r, last)) { if (write_ref == dst) WT_RET(__wt_buf_set( - session, r->cur_img_ptr, dst->mem, dst->size)); + session, &r->disk_image, dst->mem, dst->size)); } else WT_RET( __rec_split_write(session, r, last, write_ref, last_block)); @@ -3139,111 +3172,6 @@ __rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) return (__rec_split_raw_worker(session, r, next_len, false)); } -/* - * __rec_split_finish_process_prev -- - * If the two split chunks together fit in a single page, merge them into - * one. If they do not fit in a single page but the last is smaller than - * the minimum desired, move some data from the penultimate chunk to the - * last chunk and write out the previous/penultimate. Finally, update the - * pointer to the current image buffer. After this function exits, we will - * have one (last) buffer in memory, pointed to by the current image - * pointer. - */ -static int -__rec_split_finish_process_prev( - WT_SESSION_IMPL *session, WT_RECONCILE *r, bool *chunks_merged) -{ - WT_BOUNDARY *bnd_cur, *bnd_prev; - WT_BTREE *btree; - WT_PAGE_HEADER *dsk; - size_t len_to_move; - uint32_t combined_size; - uint8_t *cur_dsk_start; - - WT_ASSERT(session, r->prev_img_ptr != NULL); - - btree = S2BT(session); - bnd_cur = &r->bnd[r->bnd_next]; - bnd_prev = bnd_cur - 1; - *chunks_merged = false; - /* - * The sizes referred to in the boundary structure include the header, - * so when calculating the combined size, make sure not to include the - * header twice. - */ - combined_size = bnd_prev->size + - (bnd_cur->size - WT_PAGE_HEADER_BYTE_SIZE(btree)); - - if (combined_size <= r->page_size) { - /* - * We have two boundaries, but the data in the buffers can fit a - * single page. Merge the boundaries and create a single chunk. - */ - dsk = r->cur_img_ptr->mem; - memcpy((uint8_t *)r->prev_img_ptr->mem + bnd_prev->size, - WT_PAGE_HEADER_BYTE(btree, dsk), - bnd_cur->size - WT_PAGE_HEADER_BYTE_SIZE(btree)); - bnd_prev->size = combined_size; - bnd_prev->max_bnd_entries += bnd_cur->max_bnd_entries; - r->bnd_next--; - *chunks_merged = true; - } else { - if (bnd_cur->size < r->min_split_size && - bnd_prev->min_bnd_offset != 0 ) { - /* - * The last chunk, pointed to by the current image - * pointer, has less than the minimum data. Let's move - * any data more than the minimum from the previous - * image into the current. - */ - len_to_move = bnd_prev->size - bnd_prev->min_bnd_offset; - /* Grow current buffer if it is not large enough */ - if (r->space_avail < len_to_move) - WT_RET(__rec_split_grow(session, - r, len_to_move)); - cur_dsk_start = WT_PAGE_HEADER_BYTE(btree, - r->cur_img_ptr->mem); - - /* - * Shift the contents of the current buffer to make - * space for the data that will be prepended into the - * current buffer - */ - memmove(cur_dsk_start + len_to_move, - cur_dsk_start, bnd_cur->size - - WT_PAGE_HEADER_BYTE_SIZE(btree)); - /* - * copy any data more than the minimum, from the - * previous buffer to the start of the current. - */ - memcpy(cur_dsk_start, (uint8_t *)r->prev_img_ptr->mem + - bnd_prev->min_bnd_offset, len_to_move); - - /* Update boundary information */ - bnd_cur->size += len_to_move; - bnd_prev->size -= len_to_move; - bnd_cur->max_bnd_entries += bnd_prev->max_bnd_entries - - bnd_prev->min_bnd_entries; - bnd_prev->max_bnd_entries = bnd_prev->min_bnd_entries; - bnd_cur->max_bnd_recno = bnd_prev->min_bnd_recno; - WT_RET(__wt_buf_set(session, - &bnd_cur->max_bnd_key, bnd_prev->min_bnd_key.data, - bnd_prev->min_bnd_key.size)); - } - - /* Write out the previous image */ - WT_RET(__rec_split_write_prev_and_swap_buf(session, r)); - } - - /* - * At this point, there is only one disk image in the memory, pointed to - * by the previous image pointer. Update the current image pointer to - * this image. - */ - r->cur_img_ptr = r->prev_img_ptr; - return (0); -} - /* * __rec_split_finish_std -- * Finish processing a page, standard version. @@ -3251,9 +3179,9 @@ __rec_split_finish_process_prev( static int __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) { - WT_BOUNDARY *bnd_cur; + WT_BOUNDARY *bnd_cur, *bnd_prev; WT_PAGE_HEADER *dsk; - bool chunks_merged; + bool grow_bnd; /* * We may arrive here with no entries to write if the page was entirely @@ -3280,22 +3208,50 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) return (EBUSY); } - /* Set the number of entries and size for the just finished chunk. */ + dsk = r->disk_image.mem; + + /* Set the number of entries for the just finished chunk. */ bnd_cur = &r->bnd[r->bnd_next]; bnd_cur->max_bnd_entries = r->entries; - bnd_cur->size = WT_PTRDIFF32(r->first_free, r->cur_img_ptr->mem); - chunks_merged = false; - if (r->prev_img_ptr != NULL) - WT_RET(__rec_split_finish_process_prev(session, - r, &chunks_merged)); + grow_bnd = true; + /* + * We can reach here even with raw_compression when the last split chunk + * is too small to be sent for raw compression. + */ + if (!r->is_bulk_load && !r->raw_compression) { + if (WT_PTRDIFF(r->first_free, dsk) > r->page_size && + r->bnd_next != 0) { + /* + * We hold two boundaries worth of data in the buffer, + * and this data doesn't fit in a single page. If the + * last chunk is too small, readjust the boundary to a + * pre-computed minimum. + * Write out the penultimate chunk to the disk as a page + */ + WT_RET(__rec_split_write_prev_and_shift_cur( + session, r, true)); + } else + if (r->bnd_next != 0) { + /* + * We have two boundaries, but the data in the + * buffer can fit a single page. Merge the + * boundaries to create a single chunk. + */ + bnd_prev = bnd_cur - 1; + bnd_prev->max_bnd_entries += + bnd_cur->max_bnd_entries; + r->bnd_next--; + grow_bnd = false; + } + } /* * We already have space for an extra boundary if we merged two * boundaries above, in that case we do not need to grow the boundary * structure. */ - if (!chunks_merged) + if (grow_bnd) WT_RET(__rec_split_bnd_grow(session, r)); bnd_cur = &r->bnd[r->bnd_next]; r->bnd_next++; @@ -3304,15 +3260,14 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) * Current boundary now has all the remaining data/last page now. * Let's write it to the disk */ - dsk = r->cur_img_ptr->mem; dsk->recno = bnd_cur->max_bnd_recno; dsk->u.entries = bnd_cur->max_bnd_entries; - dsk->mem_size = bnd_cur->size; - r->cur_img_ptr->size = dsk->mem_size; + dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); + r->disk_image.size = dsk->mem_size; /* If this is a checkpoint, we're done, otherwise write the page. */ return (__rec_is_checkpoint(session, r, bnd_cur) ? - 0 : __rec_split_write(session, r, bnd_cur, r->cur_img_ptr, true)); + 0 : __rec_split_write(session, r, bnd_cur, &r->disk_image, true)); } /* @@ -3334,7 +3289,7 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) if (r->raw_compression && r->entries != 0) { while (r->entries != 0) { data_size = - WT_PTRDIFF(r->first_free, r->cur_img_ptr->mem); + WT_PTRDIFF(r->first_free, r->disk_image.mem); if (data_size <= btree->allocsize) break; WT_RET(__rec_split_raw_worker(session, r, 0, true)); @@ -5927,7 +5882,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) * write the buffer so we know what to do here. */ if (bnd->addr.addr == NULL) - WT_RET(__wt_bt_write(session, r->cur_img_ptr, + WT_RET(__wt_bt_write(session, &r->disk_image, NULL, NULL, true, F_ISSET(r, WT_CHECKPOINTING), bnd->already_compressed)); else { @@ -6591,7 +6546,7 @@ __rec_dictionary_lookup( for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash); dp != NULL && dp->hash == hash; dp = dp->next[0]) { WT_RET(__wt_cell_pack_data_match( - (WT_CELL *)((uint8_t *)r->cur_img_ptr->mem + dp->offset), + (WT_CELL *)((uint8_t *)r->disk_image.mem + dp->offset), &val->cell, val->buf.data, &match)); if (match) { WT_STAT_DATA_INCR(session, rec_dictionary); -- cgit v1.2.1 From a85c66dba541466d223e79fff738c8aca3365a3b Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 22 Jun 2017 18:07:48 -0400 Subject: WT-3379 Avoid a performance regression on secondaries. (#3471) * Continue with update/restore eviction unless we are going to fall back to using the lookaside table. * Add detailed comments explaining the tests we use for switching to the lookaside table. --- src/evict/evict_page.c | 28 ++++++++++------- src/reconcile/rec_write.c | 77 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 75 insertions(+), 30 deletions(-) diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index 01818f106fc..d50326afb1e 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -420,7 +420,7 @@ __evict_review( WT_DECL_RET; WT_PAGE *page; uint32_t flags; - bool lookaside_retry, modified; + bool lookaside_retry, *lookaside_retryp, modified; conn = S2C(session); flags = WT_EVICTING; @@ -541,6 +541,9 @@ __evict_review( * the page and keep it in memory. */ cache = conn->cache; + lookaside_retry = false; + lookaside_retryp = NULL; + if (closing) LF_SET(WT_VISIBILITY_ERR); else if (!WT_PAGE_IS_INTERNAL(page)) { @@ -552,23 +555,26 @@ __evict_review( if (F_ISSET(cache, WT_CACHE_EVICT_SCRUB)) LF_SET(WT_EVICT_SCRUB); + + /* + * Check if reconciliation suggests trying the + * lookaside table. + */ + lookaside_retryp = &lookaside_retry; } } /* Reconcile the page. */ - ret = __wt_reconcile(session, ref, NULL, flags, &lookaside_retry); + ret = __wt_reconcile(session, ref, NULL, flags, lookaside_retryp); /* - * If reconciliation fails, eviction is stuck and reconciliation reports - * it might succeed if we use the lookaside table (the page didn't have - * uncommitted updates, it was not-yet-globally visible updates causing - * the problem), configure reconciliation to write those updates to the - * lookaside table, allowing the eviction of pages we'd otherwise have - * to retain in cache to support older readers. + * If reconciliation fails, eviction is stuck and reconciliation + * reports it might succeed if we use the lookaside table, then + * configure reconciliation to write those updates to the lookaside + * table, allowing the eviction of pages we'd otherwise have to retain + * in cache to support older readers. */ - if (ret == EBUSY && - !F_ISSET(conn, WT_CONN_IN_MEMORY) && - __wt_cache_stuck(session) && lookaside_retry) { + if (ret == EBUSY && lookaside_retry && __wt_cache_stuck(session)) { LF_CLR(WT_EVICT_SCRUB | WT_EVICT_UPDATE_RESTORE); LF_SET(WT_EVICT_LOOKASIDE); ret = __wt_reconcile(session, ref, NULL, flags, NULL); diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index f7df73c4ecb..fff6d963391 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -338,7 +338,8 @@ static int __rec_split_write(WT_SESSION_IMPL *, WT_RECONCILE *, WT_BOUNDARY *, WT_ITEM *, bool); static int __rec_update_las( WT_SESSION_IMPL *, WT_RECONCILE *, uint32_t, WT_BOUNDARY *); -static int __rec_write_check_complete(WT_SESSION_IMPL *, WT_RECONCILE *); +static int __rec_write_check_complete( + WT_SESSION_IMPL *, WT_RECONCILE *, bool *); static int __rec_write_init(WT_SESSION_IMPL *, WT_REF *, uint32_t, WT_SALVAGE_COOKIE *, void *); static void __rec_write_page_status(WT_SESSION_IMPL *, WT_RECONCILE *); @@ -438,7 +439,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, /* Checks for a successful reconciliation. */ if (ret == 0) - ret = __rec_write_check_complete(session, r); + ret = __rec_write_check_complete(session, r, lookaside_retryp); /* Wrap up the page reconciliation. */ if (ret == 0 && (ret = __rec_write_wrapup(session, r, page)) == 0) @@ -449,14 +450,6 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, /* Release the reconciliation lock. */ WT_PAGE_UNLOCK(session, page); - /* - * If our caller can configure lookaside table reconciliation, flag if - * that's worth trying. The lookaside table doesn't help if we skipped - * updates, it can only help with older readers preventing eviction. - */ - if (lookaside_retryp != NULL && r->update_mem_uncommitted == 0) - *lookaside_retryp = true; - /* Update statistics. */ WT_STAT_CONN_INCR(session, rec_pages); WT_STAT_DATA_INCR(session, rec_pages); @@ -561,7 +554,8 @@ __rec_las_checkpoint_test(WT_SESSION_IMPL *session, WT_RECONCILE *r) * Check that reconciliation should complete. */ static int -__rec_write_check_complete(WT_SESSION_IMPL *session, WT_RECONCILE *r) +__rec_write_check_complete( + WT_SESSION_IMPL *session, WT_RECONCILE *r, bool *lookaside_retryp) { /* * Tests in this function are lookaside tests and tests to decide if @@ -581,17 +575,62 @@ __rec_write_check_complete(WT_SESSION_IMPL *session, WT_RECONCILE *r) return (EBUSY); /* - * If when doing update/restore based eviction, we didn't split and - * didn't apply any updates, then give up. + * Eviction can configure lookaside table reconciliation, consider if + * it's worth giving up this reconciliation attempt and falling back to + * using the lookaside table. We continue with evict/restore if + * switching to the lookaside doesn't make sense for any reason: we + * won't retry an evict/restore reconciliation until/unless the + * transactional system moves forward, so at worst it's a single wasted + * effort. * - * This may lead to saving the page to the lookaside table: that - * decision is made by eviction. + * First, check if the lookaside table is a possible alternative. */ - if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && r->bnd_next == 1 && - r->update_mem_all != 0 && r->update_mem_all == r->update_mem_saved) - return (EBUSY); + if (lookaside_retryp == NULL) + return (0); - return (0); + /* + * We only suggest lookaside if currently in an evict/restore attempt + * and some updates were saved. Our caller sets the evict/restore flag + * based on various conditions (like if this is a leaf page), which is + * why we're testing that flag instead of a set of other conditions. + * If no updates were saved, eviction will succeed without needing to + * restore anything. + */ + if (!F_ISSET(r, WT_EVICT_UPDATE_RESTORE) || r->bnd->supd == NULL) + return (0); + + /* + * Check if this reconciliation attempt is making progress. If there's + * any sign of progress, don't fall back to the lookaside table. + * + * Check if the current reconciliation split, in which case we'll + * likely get to write at least one of the blocks. If that page is + * empty, that's also progress. + */ + if (r->bnd_next != 1) + return (0); + + /* + * Check if the current reconciliation applied some updates, in which + * case evict/restore should gain us some space. + */ + if (r->update_mem_saved != r->update_mem_all) + return (0); + + /* + * Check if lookaside eviction is possible. If any of the updates we + * saw were uncommitted, the lookaside table cannot be used: it only + * helps with older readers preventing eviction. + */ + if (r->update_mem_uncommitted != 0) + return (0); + + /* + * The current evict/restore approach shows no signs of being useful, + * lookaside is possible, suggest the lookaside table. + */ + *lookaside_retryp = true; + return (EBUSY); } /* -- cgit v1.2.1 From 20679618e5f84863ef6baadc230be0b2d858f09c Mon Sep 17 00:00:00 2001 From: Sulabh Mahajan Date: Fri, 23 Jun 2017 08:46:57 +1000 Subject: WT-3251 Remove interim buffer used to split pages during reconciliation (#3469) --- src/reconcile/rec_write.c | 407 +++++++++++++++++++++++++--------------------- 1 file changed, 226 insertions(+), 181 deletions(-) diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index fff6d963391..1c266496ec8 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -25,12 +25,25 @@ typedef struct { WT_PAGE *page; uint32_t flags; /* Caller's configuration */ - WT_ITEM disk_image; /* Temporary disk-image buffer */ /* - * Temporary buffer used to write out a disk image when managing two - * chunks worth of data in memory - */ - WT_ITEM *interim_buf; + * Reconciliation can end up requiring two temporary disk image buffers + * if a page split is involved. These two disk images are pointed to by + * current and the previous image pointers. During initialization the + * first image is allocated and pointed to by the current image pointer. + * If and when a split is involved the second image gets allocated and + * is pointed to by the current image pointer. The previous image + * pointer is made to refer the first image at this point. Two images + * are kept in memory to redistribute data among them in case the last + * split chunk ends up being smaller than the minimum required. As + * reconciliation generates more split chunks, the image referred to by + * the previous image pointer is written to the disk, the current and + * the previous image pointers are swapped, making space for another + * split chunk to be reconciled in the buffer that was just written out + * to the disk. + */ + WT_ITEM disk_image[2]; /* Temporary disk-image buffers */ + WT_ITEM *cur_img_ptr; + WT_ITEM *prev_img_ptr; /* * Track start/stop write generation to decide if all changes to the @@ -146,17 +159,6 @@ typedef struct { * that references all of our split pages. */ struct __rec_boundary { - /* - * Offset is the byte offset in the initial split buffer of the - * first byte of the split chunk, recorded before we decide to - * split the page; the difference between chunk[1]'s offset and - * chunk[0]'s offset is chunk[0]'s length. - * - * Once we split a page, we stop filling in offset values, we're - * writing the split chunks as we find them. - */ - size_t offset; /* Split's first byte */ - WT_ADDR addr; /* Split's written location */ uint32_t size; /* Split's size */ uint32_t checksum; /* Split's checksum */ @@ -871,7 +873,8 @@ __rec_write_init(WT_SESSION_IMPL *session, r->last = &r->_last; /* Disk buffers need to be aligned for writing. */ - F_SET(&r->disk_image, WT_ITEM_ALIGNED); + F_SET(&r->disk_image[0], WT_ITEM_ALIGNED); + F_SET(&r->disk_image[1], WT_ITEM_ALIGNED); } /* Reconciliation is not re-entrant, make sure that doesn't happen. */ @@ -1016,8 +1019,8 @@ __rec_destroy(WT_SESSION_IMPL *session, void *reconcilep) return; *(WT_RECONCILE **)reconcilep = NULL; - __wt_buf_free(session, &r->disk_image); - __wt_scr_free(session, &r->interim_buf); + __wt_buf_free(session, &r->disk_image[0]); + __wt_buf_free(session, &r->disk_image[1]); __wt_free(session, r->raw_entries); __wt_free(session, r->raw_offsets); @@ -1805,7 +1808,7 @@ __rec_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size) */ WT_ASSERT(session, r->space_avail >= size); WT_ASSERT(session, WT_BLOCK_FITS( - r->first_free, size, r->disk_image.mem, r->disk_image.memsize)); + r->first_free, size, r->cur_img_ptr->mem, r->cur_img_ptr->memsize)); r->entries += v; r->space_avail -= size; @@ -1892,7 +1895,7 @@ __rec_dict_replace( * copy cell instead. */ if (dp->offset == 0) - dp->offset = WT_PTRDIFF32(r->first_free, r->disk_image.mem); + dp->offset = WT_PTRDIFF32(r->first_free, r->cur_img_ptr->mem); else { /* * The offset is the byte offset from this cell to the previous, @@ -1900,7 +1903,7 @@ __rec_dict_replace( * page. */ offset = (uint64_t)WT_PTRDIFF(r->first_free, - (uint8_t *)r->disk_image.mem + dp->offset); + (uint8_t *)r->cur_img_ptr->mem + dp->offset); val->len = val->cell_len = __wt_cell_pack_copy(&val->cell, rle, offset); val->buf.data = NULL; @@ -2036,7 +2039,6 @@ __rec_leaf_page_max(WT_SESSION_IMPL *session, WT_RECONCILE *r) static void __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd) { - bnd->offset = 0; bnd->max_bnd_recno = WT_RECNO_OOB; bnd->max_bnd_entries = 0; @@ -2249,15 +2251,14 @@ __rec_split_init(WT_SESSION_IMPL *session, * Ensure the disk image buffer is large enough for the max object, as * corrected by the underlying block manager. * - * The buffer that we build disk image in, needs to hold two chunks - * worth of data. Since we want to support split_size more than the page - * size (to allow for adjustments based on the compression), this buffer - * should be greater of twice of split_size and page_size. + * Since we want to support split_size more than the page size (to allow + * for adjustments based on the compression), this buffer should be + * greater of split_size and page_size. */ corrected_page_size = r->page_size; - disk_img_buf_size = 2 * WT_MAX(corrected_page_size, r->split_size); WT_RET(bm->write_size(bm, session, &corrected_page_size)); - WT_RET(__wt_buf_init(session, &r->disk_image, disk_img_buf_size)); + disk_img_buf_size = WT_MAX(corrected_page_size, r->split_size); + WT_RET(__wt_buf_init(session, &r->disk_image[0], disk_img_buf_size)); /* * Clear the disk page header to ensure all of it is initialized, even @@ -2267,15 +2268,17 @@ __rec_split_init(WT_SESSION_IMPL *session, * fixed-length column-store sets bits in bytes, where the bytes are * assumed to initially be 0. */ - memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ? + memset(r->disk_image[0].mem, 0, page->type == WT_PAGE_COL_FIX ? disk_img_buf_size : WT_PAGE_HEADER_SIZE); /* * Set the page type (the type doesn't change, and setting it later * would require additional code in a few different places). */ - dsk = r->disk_image.mem; + dsk = r->disk_image[0].mem; dsk->type = page->type; + r->cur_img_ptr = &r->disk_image[0]; + r->prev_img_ptr = NULL; r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); @@ -2284,7 +2287,6 @@ __rec_split_init(WT_SESSION_IMPL *session, WT_RET(__rec_split_bnd_grow(session, r)); __rec_split_bnd_init(session, &r->bnd[0]); r->bnd[0].max_bnd_recno = recno; - r->bnd[0].offset = WT_PAGE_HEADER_BYTE_SIZE(btree); /* Initialize the entry counter. */ r->entries = 0; @@ -2490,21 +2492,18 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len) { WT_BM *bm; WT_BTREE *btree; - size_t corrected_page_size, inuse, len; + size_t corrected_page_size, inuse; btree = S2BT(session); bm = btree->bm; - len = WT_PTRDIFF(r->first_free, r->disk_image.mem); - inuse = (len - r->bnd[r->bnd_next].offset) + - WT_PAGE_HEADER_BYTE_SIZE(btree); + inuse = WT_PTRDIFF(r->first_free, r->cur_img_ptr->mem); corrected_page_size = inuse + add_len; WT_RET(bm->write_size(bm, session, &corrected_page_size)); - /* Need to account for buffer carrying two chunks worth of data */ - WT_RET(__wt_buf_grow(session, &r->disk_image, 2 * corrected_page_size)); + WT_RET(__wt_buf_grow(session, r->cur_img_ptr, corrected_page_size)); - r->first_free = (uint8_t *)r->disk_image.mem + len; + r->first_free = (uint8_t *)r->cur_img_ptr->mem + inuse; WT_ASSERT(session, corrected_page_size >= inuse); r->space_avail = corrected_page_size - inuse; WT_ASSERT(session, r->space_avail >= add_len); @@ -2513,89 +2512,55 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len) } /* - * __rec_split_write_prev_and_shift_cur -- - * Write the previous split chunk to the disk as a page. Shift the contents - * of the current chunk to the start of the buffer, making space for a new - * chunk to be written. - * If the caller asks for a chunk resizing, the boundary between the two - * chunks is readjusted to the minimum split size boundary details stored - * in the previous chunk, letting the current chunk grow at the cost of the - * previous chunk. + * __rec_split_write_prev_and_swap_buf -- + * If there is a previous split chunk held in the memory, write it to the + * disk as a page. If there isn't one, this is the first time we are + * splitting and need to initialize a second buffer. Also, swap the + * previous and the current buffer pointers. */ static int -__rec_split_write_prev_and_shift_cur( - WT_SESSION_IMPL *session, WT_RECONCILE *r, bool resize_chunks) +__rec_split_write_prev_and_swap_buf(WT_SESSION_IMPL *session, WT_RECONCILE *r) { - WT_BM *bm; - WT_BOUNDARY *bnd_cur, *bnd_prev; - WT_BTREE *btree; - WT_PAGE_HEADER *dsk, *dsk_tmp; - size_t cur_len, len; - uint8_t *dsk_start; - - WT_ASSERT(session, r->bnd_next != 0); - - btree = S2BT(session); - bm = btree->bm; - bnd_cur = &r->bnd[r->bnd_next]; - bnd_prev = bnd_cur - 1; - dsk = r->disk_image.mem; - cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; - - /* - * Resize chunks if the current is smaller than the minimum, and there - * are details on the minimum split size boundary available in the - * previous boundary details. - * - * There is a possibility that we do not have a minimum boundary set, in - * such a case we skip chunk resizing. Such a condition is possible for - * instance when we are building the image in the buffer and the first - * K/V pair is large enough that it surpasses both the minimum split - * size and the split size the application has set. In such a case we - * split the chunk without saving any minimum boundary. - */ - if (resize_chunks && - cur_len < r->min_split_size && bnd_prev->min_bnd_offset != 0) { - bnd_cur->offset = bnd_prev->min_bnd_offset; - bnd_cur->max_bnd_entries += - bnd_prev->max_bnd_entries - bnd_prev->min_bnd_entries; - bnd_prev->max_bnd_entries = bnd_prev->min_bnd_entries; - bnd_cur->max_bnd_recno = bnd_prev->min_bnd_recno; - - WT_RET(__wt_buf_set(session, &bnd_cur->max_bnd_key, - bnd_prev->min_bnd_key.data, bnd_prev->min_bnd_key.size)); - - /* Update current chunk's length */ - cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset; + WT_BOUNDARY *bnd_prev; + WT_ITEM *tmp_img_ptr; + WT_PAGE_HEADER *dsk; + size_t disk_img_size; + + WT_ASSERT(session, (r->prev_img_ptr == NULL && r->bnd_next == 0) || + (r->prev_img_ptr != NULL && r->bnd_next != 0)); + + /* Write previous chunk, if there is one */ + if (r->prev_img_ptr != NULL) { + bnd_prev = &r->bnd[r->bnd_next - 1]; + dsk = r->prev_img_ptr->mem; + dsk->recno = bnd_prev->max_bnd_recno; + dsk->u.entries = bnd_prev->max_bnd_entries; + dsk->mem_size = (uint32_t)bnd_prev->size; + r->prev_img_ptr->size = dsk->mem_size; + WT_RET(__rec_split_write(session, + r, bnd_prev, r->prev_img_ptr, false)); + } else { + /* + * If we do not have a previous buffer, we should initialize the + * second buffer before proceeding. We will create the second + * buffer of the same size as the current buffer. + */ + disk_img_size = r->cur_img_ptr->memsize; + WT_RET(__wt_buf_init(session, + &r->disk_image[1], disk_img_size)); + r->prev_img_ptr = &r->disk_image[1]; + dsk = r->prev_img_ptr->mem; + memset(dsk, 0, + r->page->type == WT_PAGE_COL_FIX ? + disk_img_size : WT_PAGE_HEADER_SIZE); + dsk->type = r->page->type; } - /* - * Create an interim buffer if not already done to prepare the previous - * chunk's disk image. - */ - len = bnd_cur->offset; - WT_RET(bm->write_size(bm, session, &len)); - if (r->interim_buf == NULL) - WT_RET(__wt_scr_alloc(session, len, &r->interim_buf)); - else - WT_RET(__wt_buf_init(session, r->interim_buf, len)); - - dsk_tmp = r->interim_buf->mem; - memcpy(dsk_tmp, dsk, bnd_cur->offset); - dsk_tmp->recno = bnd_prev->max_bnd_recno; - dsk_tmp->u.entries = bnd_prev->max_bnd_entries; - dsk_tmp->mem_size = WT_STORE_SIZE(bnd_cur->offset); - r->interim_buf->size = dsk_tmp->mem_size; - WT_RET(__rec_split_write(session, r, bnd_prev, r->interim_buf, false)); - - /* Shift the current chunk to the start of the buffer */ - dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk); - (void)memmove(dsk_start, (uint8_t *)dsk + bnd_cur->offset, cur_len); - - /* Fix boundary offset */ - bnd_cur->offset = WT_PAGE_HEADER_BYTE_SIZE(btree); - /* Fix where free points */ - r->first_free = dsk_start + cur_len; + /* swap previous and current buffers */ + tmp_img_ptr = r->prev_img_ptr; + r->prev_img_ptr = r->cur_img_ptr; + r->cur_img_ptr = tmp_img_ptr; + return (0); } @@ -2613,7 +2578,7 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) size_t inuse; btree = S2BT(session); - dsk = r->disk_image.mem; + dsk = r->cur_img_ptr->mem; /* Fixed length col store can call with next_len 0 */ WT_ASSERT(session, next_len == 0 || r->space_avail < next_len); @@ -2627,9 +2592,7 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) "%s page too large, attempted split during salvage", __wt_page_type_string(r->page->type)); - last = &r->bnd[r->bnd_next]; - inuse = (WT_PTRDIFF(r->first_free, dsk) - last->offset) + - WT_PAGE_HEADER_BYTE_SIZE(btree); + inuse = WT_PTRDIFF(r->first_free, dsk); /* * We can get here if the first key/value pair won't fit. @@ -2642,8 +2605,10 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) /* All page boundaries reset the dictionary. */ __rec_dictionary_reset(r); - /* Set the number of entries for the just finished chunk. */ + /* Set the number of entries and size for the just finished chunk. */ + last = &r->bnd[r->bnd_next]; last->max_bnd_entries = r->entries; + last->size = (uint32_t)inuse; /* * In case of bulk load, write out chunks as we get them. Otherwise we @@ -2655,19 +2620,22 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) dsk->recno = last->max_bnd_recno; dsk->u.entries = last->max_bnd_entries; dsk->mem_size = (uint32_t)inuse; - r->disk_image.size = dsk->mem_size; - WT_RET(__rec_split_write( - session, r, last, &r->disk_image, false)); - /* Fix where free points */ - r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); - } else if (r->bnd_next != 0) - WT_RET(__rec_split_write_prev_and_shift_cur(session, r, false)); + r->cur_img_ptr->size = dsk->mem_size; + WT_RET(__rec_split_write(session, + r, last, r->cur_img_ptr, false)); + } else { + WT_RET(__rec_split_write_prev_and_swap_buf(session, r)); + /* current image we are writing to has changed */ + dsk = r->cur_img_ptr->mem; + } + + /* Fix where free points */ + r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); /* Prepare the next boundary */ WT_RET(__rec_split_bnd_grow(session, r)); r->bnd_next++; next = &r->bnd[r->bnd_next]; - next->offset = WT_PTRDIFF(r->first_free, dsk); /* Set the key for the next chunk. */ next->max_bnd_recno = r->recno; if (dsk->type == WT_PAGE_ROW_INT || dsk->type == WT_PAGE_ROW_LEAF) @@ -2726,9 +2694,8 @@ __rec_split_crossing_bnd( !WT_CROSSING_SPLIT_BND(r, next_len)) { btree = S2BT(session); bnd = &r->bnd[r->bnd_next]; - dsk = r->disk_image.mem; - min_bnd_offset = (WT_PTRDIFF(r->first_free, dsk) - - bnd->offset) + WT_PAGE_HEADER_BYTE_SIZE(btree); + dsk = r->cur_img_ptr->mem; + min_bnd_offset = WT_PTRDIFF(r->first_free, dsk); if (min_bnd_offset == WT_PAGE_HEADER_BYTE_SIZE(btree)) /* * This is possible if the first record doesn't fit in @@ -2789,7 +2756,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session, unpack = &_unpack; compressor = btree->compressor; dst = &r->raw_destination; - dsk = r->disk_image.mem; + dsk = r->cur_img_ptr->mem; WT_RET(__rec_split_bnd_grow(session, r)); last = &r->bnd[r->bnd_next]; @@ -3105,7 +3072,7 @@ no_slots: r->first_free = dsk_start + len; r->space_avail += r->raw_offsets[result_slots]; WT_ASSERT(session, r->first_free + r->space_avail <= - (uint8_t *)r->disk_image.mem + r->disk_image.memsize); + (uint8_t *)r->cur_img_ptr->mem + r->cur_img_ptr->memsize); /* * Set the key for the next block (before writing the block, a @@ -3144,13 +3111,13 @@ no_slots: dsk->recno = last->max_bnd_recno; dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); dsk->u.entries = r->entries; - r->disk_image.size = dsk->mem_size; + r->cur_img_ptr->size = dsk->mem_size; r->entries = 0; r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); r->space_avail = r->page_size - WT_PAGE_HEADER_BYTE_SIZE(btree); - write_ref = &r->disk_image; + write_ref = r->cur_img_ptr; last->already_compressed = false; } else { /* @@ -3178,7 +3145,7 @@ no_slots: last_block && __rec_is_checkpoint(session, r, last)) { if (write_ref == dst) WT_RET(__wt_buf_set( - session, &r->disk_image, dst->mem, dst->size)); + session, r->cur_img_ptr, dst->mem, dst->size)); } else WT_RET( __rec_split_write(session, r, last, write_ref, last_block)); @@ -3211,6 +3178,111 @@ __rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) return (__rec_split_raw_worker(session, r, next_len, false)); } +/* + * __rec_split_finish_process_prev -- + * If the two split chunks together fit in a single page, merge them into + * one. If they do not fit in a single page but the last is smaller than + * the minimum desired, move some data from the penultimate chunk to the + * last chunk and write out the previous/penultimate. Finally, update the + * pointer to the current image buffer. After this function exits, we will + * have one (last) buffer in memory, pointed to by the current image + * pointer. + */ +static int +__rec_split_finish_process_prev( + WT_SESSION_IMPL *session, WT_RECONCILE *r, bool *chunks_merged) +{ + WT_BOUNDARY *bnd_cur, *bnd_prev; + WT_BTREE *btree; + WT_PAGE_HEADER *dsk; + size_t len_to_move; + uint32_t combined_size; + uint8_t *cur_dsk_start; + + WT_ASSERT(session, r->prev_img_ptr != NULL); + + btree = S2BT(session); + bnd_cur = &r->bnd[r->bnd_next]; + bnd_prev = bnd_cur - 1; + *chunks_merged = false; + /* + * The sizes referred to in the boundary structure include the header, + * so when calculating the combined size, make sure not to include the + * header twice. + */ + combined_size = bnd_prev->size + + (bnd_cur->size - WT_PAGE_HEADER_BYTE_SIZE(btree)); + + if (combined_size <= r->page_size) { + /* + * We have two boundaries, but the data in the buffers can fit a + * single page. Merge the boundaries and create a single chunk. + */ + dsk = r->cur_img_ptr->mem; + memcpy((uint8_t *)r->prev_img_ptr->mem + bnd_prev->size, + WT_PAGE_HEADER_BYTE(btree, dsk), + bnd_cur->size - WT_PAGE_HEADER_BYTE_SIZE(btree)); + bnd_prev->size = combined_size; + bnd_prev->max_bnd_entries += bnd_cur->max_bnd_entries; + r->bnd_next--; + *chunks_merged = true; + } else { + if (bnd_cur->size < r->min_split_size && + bnd_prev->min_bnd_offset != 0 ) { + /* + * The last chunk, pointed to by the current image + * pointer, has less than the minimum data. Let's move + * any data more than the minimum from the previous + * image into the current. + */ + len_to_move = bnd_prev->size - bnd_prev->min_bnd_offset; + /* Grow current buffer if it is not large enough */ + if (r->space_avail < len_to_move) + WT_RET(__rec_split_grow(session, + r, len_to_move)); + cur_dsk_start = WT_PAGE_HEADER_BYTE(btree, + r->cur_img_ptr->mem); + + /* + * Shift the contents of the current buffer to make + * space for the data that will be prepended into the + * current buffer + */ + memmove(cur_dsk_start + len_to_move, + cur_dsk_start, bnd_cur->size - + WT_PAGE_HEADER_BYTE_SIZE(btree)); + /* + * copy any data more than the minimum, from the + * previous buffer to the start of the current. + */ + memcpy(cur_dsk_start, (uint8_t *)r->prev_img_ptr->mem + + bnd_prev->min_bnd_offset, len_to_move); + + /* Update boundary information */ + bnd_cur->size += (uint32_t)len_to_move; + bnd_prev->size -= (uint32_t)len_to_move; + bnd_cur->max_bnd_entries += bnd_prev->max_bnd_entries - + bnd_prev->min_bnd_entries; + bnd_prev->max_bnd_entries = bnd_prev->min_bnd_entries; + bnd_cur->max_bnd_recno = bnd_prev->min_bnd_recno; + WT_RET(__wt_buf_set(session, + &bnd_cur->max_bnd_key, bnd_prev->min_bnd_key.data, + bnd_prev->min_bnd_key.size)); + } + + /* Write out the previous image */ + WT_RET(__rec_split_write_prev_and_swap_buf(session, r)); + } + + /* + * At this point, there is only one disk image in the memory, pointed to + * by the previous image pointer. Update the current image pointer to + * this image. + */ + r->cur_img_ptr = r->prev_img_ptr; + return (0); +} + /* * __rec_split_finish_std -- * Finish processing a page, standard version. @@ -3218,9 +3290,9 @@ __rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) static int __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) { - WT_BOUNDARY *bnd_cur, *bnd_prev; + WT_BOUNDARY *bnd_cur; WT_PAGE_HEADER *dsk; - bool grow_bnd; + bool chunks_merged; /* * We may arrive here with no entries to write if the page was entirely @@ -3247,50 +3319,22 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) return (EBUSY); } - dsk = r->disk_image.mem; - - /* Set the number of entries for the just finished chunk. */ + /* Set the number of entries and size for the just finished chunk. */ bnd_cur = &r->bnd[r->bnd_next]; bnd_cur->max_bnd_entries = r->entries; + bnd_cur->size = WT_PTRDIFF32(r->first_free, r->cur_img_ptr->mem); - grow_bnd = true; - /* - * We can reach here even with raw_compression when the last split chunk - * is too small to be sent for raw compression. - */ - if (!r->is_bulk_load && !r->raw_compression) { - if (WT_PTRDIFF(r->first_free, dsk) > r->page_size && - r->bnd_next != 0) { - /* - * We hold two boundaries worth of data in the buffer, - * and this data doesn't fit in a single page. If the - * last chunk is too small, readjust the boundary to a - * pre-computed minimum. - * Write out the penultimate chunk to the disk as a page - */ - WT_RET(__rec_split_write_prev_and_shift_cur( - session, r, true)); - } else - if (r->bnd_next != 0) { - /* - * We have two boundaries, but the data in the - * buffer can fit a single page. Merge the - * boundaries to create a single chunk. - */ - bnd_prev = bnd_cur - 1; - bnd_prev->max_bnd_entries += - bnd_cur->max_bnd_entries; - r->bnd_next--; - grow_bnd = false; - } - } + chunks_merged = false; + if (r->prev_img_ptr != NULL) + WT_RET(__rec_split_finish_process_prev(session, + r, &chunks_merged)); /* * We already have space for an extra boundary if we merged two * boundaries above, in that case we do not need to grow the boundary * structure. */ - if (grow_bnd) + if (!chunks_merged) WT_RET(__rec_split_bnd_grow(session, r)); bnd_cur = &r->bnd[r->bnd_next]; r->bnd_next++; @@ -3299,14 +3343,15 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) * Current boundary now has all the remaining data/last page now. * Let's write it to the disk */ + dsk = r->cur_img_ptr->mem; dsk->recno = bnd_cur->max_bnd_recno; dsk->u.entries = bnd_cur->max_bnd_entries; - dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); - r->disk_image.size = dsk->mem_size; + dsk->mem_size = bnd_cur->size; + r->cur_img_ptr->size = dsk->mem_size; /* If this is a checkpoint, we're done, otherwise write the page. */ return (__rec_is_checkpoint(session, r, bnd_cur) ? - 0 : __rec_split_write(session, r, bnd_cur, &r->disk_image, true)); + 0 : __rec_split_write(session, r, bnd_cur, r->cur_img_ptr, true)); } /* @@ -3328,7 +3373,7 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) if (r->raw_compression && r->entries != 0) { while (r->entries != 0) { data_size = - WT_PTRDIFF(r->first_free, r->disk_image.mem); + WT_PTRDIFF(r->first_free, r->cur_img_ptr->mem); if (data_size <= btree->allocsize) break; WT_RET(__rec_split_raw_worker(session, r, 0, true)); @@ -5921,7 +5966,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) * write the buffer so we know what to do here. */ if (bnd->addr.addr == NULL) - WT_RET(__wt_bt_write(session, &r->disk_image, + WT_RET(__wt_bt_write(session, r->cur_img_ptr, NULL, NULL, true, F_ISSET(r, WT_CHECKPOINTING), bnd->already_compressed)); else { @@ -6585,7 +6630,7 @@ __rec_dictionary_lookup( for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash); dp != NULL && dp->hash == hash; dp = dp->next[0]) { WT_RET(__wt_cell_pack_data_match( - (WT_CELL *)((uint8_t *)r->disk_image.mem + dp->offset), + (WT_CELL *)((uint8_t *)r->cur_img_ptr->mem + dp->offset), &val->cell, val->buf.data, &match)); if (match) { WT_STAT_DATA_INCR(session, rec_dictionary); -- cgit v1.2.1 From f59321a3726bfd0caa71b6c653f7972e9e076682 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 26 Jun 2017 03:04:00 -0400 Subject: WT-3373 Access violation due to a bug in internal page splitting (#3478) When acquiring a lock on our parent internal page, we use the WT_REF.home field to reference our parent page. As a child of the parent page, we prevent its eviction, but that's a weak guarantee. If the parent page splits, and our WT_REF were to move with the split, the WT_REF.home field might change underneath us and we could race, and end up attempting to access an evicted page. Set the session page-index generation so if the parent splits, it still can't be evicted. --- src/btree/bt_split.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 71346baee2e..c1b7b6c4001 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -1122,12 +1122,12 @@ err: switch (complete) { } /* - * __split_internal_lock -- + * __split_internal_lock_worker -- * Lock an internal page. */ static int -__split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock, - WT_PAGE **parentp, bool *hazardp) +__split_internal_lock_worker(WT_SESSION_IMPL *session, + WT_REF *ref, bool trylock, WT_PAGE **parentp, bool *hazardp) { WT_DECL_RET; WT_PAGE *parent; @@ -1205,6 +1205,32 @@ err: WT_PAGE_UNLOCK(session, parent); return (ret); } +/* + * __split_internal_lock -- + * Lock an internal page. + */ +static int +__split_internal_lock(WT_SESSION_IMPL *session, + WT_REF *ref, bool trylock, WT_PAGE **parentp, bool *hazardp) +{ + WT_DECL_RET; + + /* + * There's no lock on our parent page and we're about to acquire one, + * which implies using the WT_REF.home field to reference our parent + * page. As a child of the parent page, we prevent its eviction, but + * that's a weak guarantee. If the parent page splits, and our WT_REF + * were to move with the split, the WT_REF.home field might change + * underneath us and we could race, and end up attempting to access + * an evicted page. Set the session page-index generation so if the + * parent splits, it still can't be evicted. + */ + WT_WITH_PAGE_INDEX(session, + ret = __split_internal_lock_worker( + session, ref, trylock, parentp, hazardp)); + return (ret); +} + /* * __split_internal_unlock -- * Unlock the parent page. -- cgit v1.2.1 From d139a5d5be1d7ba94130502b379a61a809e66272 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Tue, 27 Jun 2017 12:00:15 +1000 Subject: WT-3391 Add release notes for the WiredTiger 2.9.3 release (#3481) * Add release notes for the WiredTiger 2.9.3 release * Update doc landing page --- NEWS | 24 ++++++++++++++++++++++++ README | 2 +- build_posix/aclocal/version-set.m4 | 2 +- src/docs/top/main.dox | 8 ++++---- 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/NEWS b/NEWS index 380db269523..ffcefd5f8c1 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,30 @@ Ticket reference tags refer to tickets in the MongoDB JIRA tracking system: https://jira.mongodb.org +WiredTiger release 2.9.3, 2017-06-27 +------------------------------------ + +See the upgrading documentation for details of API and behavior changes. + +Significant changes: +* WT-2972 Add an interface allowing partial updates to existing values +* WT-3063 Add an interface allowing reservation of records for read-modify-write +* WT-3142 Add a workload generator application +* WT-3160 Improve eviction of internal pages from idle trees +* WT-3245 Avoid hangs on shutdown when a utility thread encounters an error +* WT-3258 Improve visibility into thread wait time due to pages exceeding memory_page_max +* WT-3263 Allow archive on restart/recovery if clean shutdown +* WT-3287 Review WiredTiger internal panic checks +* WT-3292 Review/cleanup full-barrier calls in WiredTiger +* WT-3296 LAS table fixes/improvements +* WT-3327 Checkpoints can hang if time runs backward +* WT-3345 Improve rwlock scaling +* WT-3373 Access violation due to a bug in internal page splitting +* WT-3379 Change when pages can be split to avoid excessively slowing some operations + +See JIRA changelog for a full listing: +https://jira.mongodb.org/browse/WT/fixforversion/18291 + WiredTiger release 2.9.2, 2017-05-25 ------------------------------------ diff --git a/README b/README index eb5324eb4d1..db51b69d91b 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -WiredTiger 2.9.3: (May 27, 2017) +WiredTiger 2.9.3: (June 26, 2017) This is version 2.9.3 of WiredTiger. diff --git a/build_posix/aclocal/version-set.m4 b/build_posix/aclocal/version-set.m4 index bbf8547e548..07765503294 100644 --- a/build_posix/aclocal/version-set.m4 +++ b/build_posix/aclocal/version-set.m4 @@ -3,7 +3,7 @@ dnl build by dist/s_version VERSION_MAJOR=2 VERSION_MINOR=9 VERSION_PATCH=3 -VERSION_STRING='"WiredTiger 2.9.3: (May 27, 2017)"' +VERSION_STRING='"WiredTiger 2.9.3: (June 26, 2017)"' AC_SUBST(VERSION_MAJOR) AC_SUBST(VERSION_MINOR) diff --git a/src/docs/top/main.dox b/src/docs/top/main.dox index 6b28bd0062f..1bfb623c0a0 100644 --- a/src/docs/top/main.dox +++ b/src/docs/top/main.dox @@ -6,12 +6,12 @@ WiredTiger is an high performance, scalable, production quality, NoSQL, @section releases Releases -@row{WiredTiger 2.9.2 (current), +@row{WiredTiger 2.9.3 (current), + [Release package], + [Documentation]} +@row{WiredTiger 2.9.2 (previous), [Release package], [Documentation]} -@row{WiredTiger 2.8.0 (previous), - [Release package], - [Documentation]} @row{Development branch, [Source code], [Documentation]} -- cgit v1.2.1