diff options
author | Alex Gorrod <alexg@wiredtiger.com> | 2016-04-05 14:43:57 +1000 |
---|---|---|
committer | Alex Gorrod <alexg@wiredtiger.com> | 2016-04-05 14:44:50 +1000 |
commit | de6f136d83b20f8a58ba6fe4ba02be229b6c9159 (patch) | |
tree | 3221d66b54cbf6208fc3c995fdbb36d347ae85ff /src/third_party/wiredtiger | |
parent | 5d1262cc394d685b59ae3185d7315227085e897d (diff) | |
download | mongo-de6f136d83b20f8a58ba6fe4ba02be229b6c9159.tar.gz |
Import wiredtiger-wiredtiger-2.8.0-134-g5047aab.tar.gz from wiredtiger branch mongodb-3.4
ref: 9cf8eb2..5047aab
SERVER-23504 Coverity analysis defect 98177: Resource leak
WT-2330 in-memory configurations should not create on-disk collection files
WT-2513 conversion from 'int64_t' to 'uint32_t'
WT-2522 Incorrect format code in message
WT-2525 in-memory configurations: miscellaneous cleanups
WT-2527 OS X compile error, missing POSIX_FADV_WILLNEED #define
WT-2528 style error in WiredTiger build
WT-2529 The readonly test case is crashing with a stack overflow
WT-2531 in-memory tables are allocating unnecessary memory
WT-2532 WT_STREAM_APPEND and WT_STREAM_LINE_BUFFER flag overlap
WT-2533 Ensure that in-memory tables don't report a zero size
SERVER-23517 WiredTiger changes for MongoDB 3.3.5
Diffstat (limited to 'src/third_party/wiredtiger')
125 files changed, 4544 insertions, 3034 deletions
diff --git a/src/third_party/wiredtiger/NEWS b/src/third_party/wiredtiger/NEWS index 546d08b2418..af8b15488cc 100644 --- a/src/third_party/wiredtiger/NEWS +++ b/src/third_party/wiredtiger/NEWS @@ -1,3 +1,159 @@ +WiredTiger release 2.8.0, 2015-03-24 +------------------------------------ + +The WiredTiger 2.8.0 release contains new features, new supported platforms, +minor API changes and bug fixes. + +New features and API changes; refer to the API documentation for full details: + +* WT-60 Port WiredTiger to run on big endian platforms +* WT-2287 Add a new WT_SESSION.rebalance API +* WT-2333 Add a lock_wait configuration setting to WT_SESSION.drop to avoid blocking +* WT-2349 Add a readonly configuration setting to wiredtiger_open +* WT-2363 Remove built in support for bzip2 compression +* WT-2404 Add streaming pack/unpack methods to the extension API + +Significant changes and bug fixes: + +* WT-1801 Add a directory sync after rollback of a WT_SESSION::rename operation +* WT-2130 Improve on-disk page utilization with random workloads +* WT-2275 Fix a database corruption after truncate and crash +* WT-2264 High update workloads can cause checkpoints to never complete +* WT-2290 Improve effectiveness of WT_SESSION.compact +* WT-2361 Fix a bug in column-store where verify identifies out of order data +* WT-2367 Fix a bug in WT_CURSOR.next that could cause out-of-order key returns +* WT-2374 Fix a bug where a database was corrupted when restoring a backup +* WT-2381 Fix the dump utility to include the table configuration +* WT-2451 Allow the WiredTiger metadata to be evicted +* WT-2490 Fix a bug in column-store where search_near() returns the wrong key + +Issues fixed in MongoDB: + +* SERVER-21619 sys-perf: WT crash during core_workloads_WT execution +* SERVER-21833 Enhance WT_SESSION::compact to more reliably release space +* SERVER-21887 Enhance $sample to be faster on newly created collection +* SERVER-22676 Allow WiredTiger to open databases created by 3.0.0 or 3.0.1 +* SERVER-22773 New CRC32 implementation on PowerPC +* SERVER-22831 Low query rate with heavy cache pressure and an idle collection + +Other noteworthy changes since the previous release: + +* WT-1517 Fix error handling around schema format edge cases +* WT-2060 Simplify aggregation of statistics +* WT-2073 Metadata cleanups +* WT-2099 Seeing memory underflow messages +* WT-2107 Add example code including an event handler +* WT-2113 Truncate test occasionally fails with unexpected EBUSY +* WT-2123 Don't clear allocated memory if not required +* WT-2173 Fix some cases where tiny caches could get stuck full +* WT-2177 Add an optional per-thread seed to random number generator +* WT-2198 Bulk load and column store appends +* WT-2215 WT_LSN needs to support atomic reads and updates +* WT-2216 Simplify row-store search loop slightly +* WT-2231 Pinned page cursor searches could check parent keys +* WT-2235 Add a unicode option to WiredTiger printlog utility +* WT-2242 WiredTiger treats dead trees the same as other trees in eviction +* WT-2246 Improve performance for column-store append searches +* WT-2247 Variable-length column-store in-memory page splits +* WT-2258 Stop WiredTiger pre-loading pages when direct-IO is configured +* WT-2259 Fix error handling when getting exclusive access to a btree +* WT-2262 Fix random cursor next so it is not skewed by tree shape +* WT-2265 WiredTiger related change in PowerPC specific code block in gcc.h +* WT-2272 Fix a bug in the sweep server that triggered an assertion +* WT-2276 Add a tool to decode checkpoint addr +* WT-2277 Remove WT check against big-endian systems +* WT-2279 Define WT_PAUSE(), WT_FULL_BARRIER(), etc when s390x is defined +* WT-2280 Add CRC32 Optimized code for PowerPC +* WT-2282 Error in wt_txn_update_oldest verbose message test +* WT-2283 Retry in txn_update_oldest results in a hang +* WT-2285 Enhance configure to set BUFFER_ALIGNMENT_DEFAULT to 4kb on Linux +* WT-2289 Fix a bug in btree search when doing a fast key check +* WT-2291 Random cursor walk inefficient in skip list only trees +* WT-2295 WT_SESSION.create does a full-scan of the main table +* WT-2296 Improve log algorithm for sync/flush settings +* WT-2297 Fix off-by-one error in Huffman config file parsing +* WT-2299 Clean up layering violation between btree and block manager code +* WT-2307 Fix a bug where internal page splits can corrupt cursor iteration +* WT-2308 Add support for custom extractor for ref_cursors in join cursor +* WT-2311 Add support for UltraSparc platform +* WT-2312 Fix a bug where re-creating a deleted column-store page can corrupt the in-memory tree +* WT-2313 Fix a bug in the sweep server +* WT-2314 Update page-swap error handling so that it is consistent +* WT-2316 Fix a bug in WT_CURSOR.prev where it could return keys out-of-order +* WT-2318 Enhance condition wait implementation to use less CPU on idle databases +* WT-2321 Fix a race between eviction and worker threads on the eviction queue +* WT-2322 Fix a bug in read-uncommitted join cursors where using Bloom filters is unsafe +* WT-2328 Update schema drop does to use the block manager interface for file removal +* WT-2331 Checking of search result for reference cursors before join +* WT-2332 Fix a bug in logging write-no-sync mode +* WT-2335 Fix a bug where parsing an invalid configuration string could segfault +* WT-2338 Disable using pre-allocated log files when a backup cursor is open +* WT-2339 Fix a bug in rebalance that caused database verification failure +* WT-2340 Add logging guarantee assertions +* WT-2345 Avoid creating tiny pages on disk when evicting small pages from cache +* WT-2346 Enhance checkpoint implementation so the schema lock is not held during I/O +* WT-2347 Fix some schema format edge cases in Java API +* WT-2352 Allow build and test without requiring lz4 +* WT-2355 Fix minor scratch buffer usage in logging +* WT-2356 log scan advances to next log file on partially written record +* WT-2368 Fix a bug where row-store can pass invalid keys to collator functions +* WT-2369 Use C compiler to detect headers instead of C++ compiler +* WT-2371 Fix a bug where parent split cannot access the page after page-index swap +* WT-2372 WiredTiger windows builder fails with C4005 against the "inline" macro +* WT-2375 Add tests for custom collators +* WT-2378 Fix a hang in LSM when doing forced drop with the no wait option +* WT-2382 Fix a bug in join cursors with custom collator for 'u' format +* WT-2384 Fix a bug in join cursors where lt, le conditions for ordering could be wrong +* WT-2387 Fix cursor random unit test on Windows +* WT-2390 Fix the OS X build +* WT-2391 Enhance eviction so that it is less likely to evict pages from indexes +* WT-2394 Fix a bug in compact that meant we didn't always reclaim available space +* WT-2395 Fix a recovery failure with an LSM tree +* WT-2396 Fix a deadlock between table drop and checkpoint +* WT-2397 Fix a bug in cursor traversal where doing a reverse walk could skip records. +* WT-2399 Add test case that verifies cursor traversal +* WT-2409 Fix a minor performance regression in LSM +* WT-2410 Stop casting function pointers to different types +* WT-2411 Fix a hang in LSM related to dropping tables +* WT-2414 Avoid extractor calls for ordering cursor in join cursor +* WT-2417 Windows Jenkins task is failing +* WT-2418 Fix a bug in WT_SESSION.rebalance where it could return EBUSY +* WT-2420 Fix a bug in LSM where recovery from a backup could fail +* WT-2423 Fix a bug in session reference counting on error handling +* WT-2425 Fix a performance regression in wtperf evict-btree read workload +* WT-2426 Fix a deadlock caused by recent changes to checkpoint handle locking +* WT-2428 Make statistics logging compatible with MongoDB +* WT-2429 Add a statistic that tracks aggressive mode in eviction +* WT-2430 Add statistics for join cursor +* WT-2432 Fix a performance regression on LSM and read only workloads +* WT-2433 Allow read-only databases to log statistics +* WT-2434 Fix a race between force-drop and sweep +* WT-2436 Fix a bug in join cursors with lt, le conditions and "strategy=bloom" +* WT-2438 Extend WiredTiger stat declarations to help external tools +* WT-2440 Fix a bug in the PowerPC checksum implementation +* WT-2443 Add statistics for all indexes used in join cursor +* WT-2447 Enhance join cursor implementation to avoid reading main table where possible +* WT-2448 Add no_scale flag to relevant statistics +* WT-2449 Enhance configure to check for a 64-bit build +* WT-2454 Fix checkpoint_sync=false behavior to prevent flushes/sync to disk +* WT-2456 Fix PowerPC CRC32 Code +* WT-2457 Fix a bug where dropping an LSM table can return EBUSY when no user ops are active +* WT-2459 Allow configure to use the --tag option for libtool when compiling on PowerPC +* WT-2460 Fix a bug where checkpoint could fail with WT_ROLLBACK +* WT-2471 Update WiredTiger printf formats to be platform aware +* WT-2476 Fix a race where btree->evict_lock is being accessed after being destroyed +* WT-2481 Fix a recently introduced performance regression in LSM +* WT-2483 Make read only testing more robust +* WT-2485 Fix a test/format failure with floating point exception +* WT-2492 Fix a bug in Windows where we used the different memory allocators accidentally +* WT-2495 Missing memory initialization leads to crash on Windows +* WT-2496 Fix a bug revealed by test/format unable to read root page +* WT-2497 Enhance test/format to save a copy of backup +* WT-2498 Fix a bug in LSM tree drop where it could hang when a user cursor is open +* WT-2499 Fix a bug in LSM shutdown where a race condition causes a segfault +* WT-2501 Fix a bug where dropping a just opened LSM tree isn't thread safe +* WT-2502 Fix a memory leak in locking handles for checkpoint + WiredTiger release 2.7.0, 2015-12-08 ------------------------------------ @@ -6,222 +162,221 @@ fixes. New features and API changes; refer to the API documentation for full details: -* 959376c WT-147: Create indexes on non-empty tables. -* 4368d39 WT-1315: Add an implementation of cursor joins via a new WT_SESSION::join API. -* 944ccd1 WT-1350: Add a new configuration option to ::wiredtiger_open and - WT_CONNECTION::reconfigure called "eviction_dirty_trigger" that causes eviction to start evicting - dirty pages from cache once the given threshold has been reached. -* ab5a8fb WT-1728: Add a WT_SESSION::reset method to release resources held by a session. -* 263c5b7 WT-1930: Allow setting "file_manager=(close_idle_time=0)" to ::wiredtiger_open and - WT_CONNECTION::reconfigure to disable closing idle handles. -* 6310c3f WT-1959: Change verify to distinguish between warnings and errors. Add a new strict mode - to verify that causes warnings to be reported as errors. Use strict mode to match earlier - behavior. See the upgrading documentation for more information. -* e0d6229 WT-1980: Add a new "metadata:create" URI to WT_SESSION::open_cursor for metadata cursors - that return strings useful for passing to WT_SESSION::create. -* 292712e WT-2065: Add a new configuration option to ::wiredtiger_open and - WT_CONNECTION::reconfigure called "shared_cache=(quota)" that limits the amount of shared cache a - participant can be assigned. -* 4d0ebf4 WT-2104: Add a method to flush log files via a new WT_SESSION::log_flush API. Made - WT_SESSION::commit_transaction configuration options match WT_SESSION::log_flush. Change the - default WT_SESSION::transaction_sync timeout to 20 minutes rather than infinity. -* 21b8330 WT-2151: Enhance logging configuration to allow reconfiguration and add a new - "log=(zero_fill)" configuration option that causes WiredTiger to zero-fill log files on creation. -* 368b307 WT-2200: Add a new configuration option to ::wiredtiger_open called "write_through" that - causes WiredTiger to specify the FILE_FLAG_WRITE_THROUGH on Windows when writing files (default - false, including when "direct_io" is configured). -* 08c0fcd WT-2217: After a successful call to WT_CURSOR::insert, the key and value will be - cleared from the cursor. See the upgrading documentation for more information. -* d4fc69a SERVER-17078: Add a "statistics=(size)" mode to statistics cursors, which allows for - retrieving file size only. -* b83b901 SERVER-18356: Changed the handling of the "config_base" option to ::wiredtiger_open. See - upgrading documentation for more information. +* WT-147 Create indexes on non-empty tables. +* WT-1315 Add an implementation of cursor joins via a new WT_SESSION::join API. +* WT-1350 Add a new configuration option to ::wiredtiger_open and + WT_CONNECTION::reconfigure called "eviction_dirty_trigger" that causes eviction to start + evicting dirty pages from cache once the given threshold has been reached. +* WT-1728 Add a WT_SESSION::reset method to release resources held by a session. +* WT-1930 Allow setting "file_manager=(close_idle_time=0)" to ::wiredtiger_open and + WT_CONNECTION::reconfigure to disable closing idle handles. +* WT-1959 Change verify to distinguish between warnings and errors. Add a new strict mode + to verify that causes warnings to be reported as errors. Use strict mode to match earlier + behavior. See the upgrading documentation for more information. +* WT-1980 Add a new "metadata:create" URI to WT_SESSION::open_cursor for metadata cursors + that return strings useful for passing to WT_SESSION::create. +* WT-2065 Add a new configuration option to ::wiredtiger_open and + WT_CONNECTION::reconfigure called "shared_cache=(quota)" that limits the amount of shared + cache a participant can be assigned. +* WT-2104 Add a method to flush log files via a new WT_SESSION::log_flush API. Made + WT_SESSION::commit_transaction configuration options match WT_SESSION::log_flush. Change + the default WT_SESSION::transaction_sync timeout to 20 minutes rather than infinity. +* WT-2151 Enhance logging configuration to allow reconfiguration and add a new "log=(zero_fill)" + configuration option that causes WiredTiger to zero-fill log files on creation. +* WT-2200 Add a new configuration option to ::wiredtiger_open called "write_through" that + causes WiredTiger to specify the FILE_FLAG_WRITE_THROUGH on Windows when writing files + (default false, including when "direct_io" is configured). +* WT-2217 After a successful call to WT_CURSOR::insert, the key and value will be + cleared from the cursor. See the upgrading documentation for more information. +* SERVER-17078 Add a "statistics=(size)" mode to statistics cursors, which allows for + retrieving file size only. +* SERVER-18356 Changed the handling of the "config_base" option to ::wiredtiger_open. See + upgrading documentation for more information. The following statistics were removed: -* f1ed3b9 WT-1481: connection dhandles swept. -* f1ed3b9 WT-1481: connection candidate referenced. -* 4ba4518 WT-1481: failed to find a slot large enough for record. -* 28563af WT-1989: log buffer size increases. -* f81c70d WT-1989: slots selected for switching that were unavailable. -* df4f69c WT-2094: log records written directly. -* df4f69c WT-2094: record size exceeded maximum. -* d68e078 WT-2182: pages split during eviction. +* WT-1481 connection dhandles swept. +* WT-1481 connection candidate referenced. +* WT-1481 failed to find a slot large enough for record. +* WT-1989 log buffer size increases. +* WT-1989 slots selected for switching that were unavailable. +* WT-2094 log records written directly. +* WT-2094 record size exceeded maximum. +* WT-2182 pages split during eviction. Lookaside table: -* 6a5a461 WT-1967: Allow eviction of updates required by old readers. -* 87592ec WT-2074: Fix a race between lookaside table reconciliation and checkpoints. -* 0390b29 WT-2149: Fix the order of creation of the lookaside table. -* 7518a69 WT-2190: Fix transaction visibility test that is applied to the lookaside table. -* 2cf57a6 SERVER-21585: Don't use the lookaside file until the cache is stuck full. +* WT-1967 Allow eviction of updates required by old readers. +* WT-2074 Fix a race between lookaside table reconciliation and checkpoints. +* WT-2149 Fix the order of creation of the lookaside table. +* WT-2190 Fix transaction visibility test that is applied to the lookaside table. +* SERVER-21585 Don't use the lookaside file until the cache is stuck full. Issues fixed in MongoDB: -* d57dc26 SERVER-18829: Have pages start in the middle of the LRU queue for eviction. -* b847ccc SERVER-18838: During drops, don't remove files until the metadata is durable. -* 8f7da9a SERVER-18875: Clean up deleted pages. -* d04083d SERVER-18899: Add unit test to simulate fsyncLock. -* 3ec45a7 SERVER-19340: Avoid type aliasing in the random number generator. -* 907c0ca SERVER-19445: Have the oldest transaction update the oldest tracked ID. -* fb8739f SERVER-19522: Try to evict internal pages with no useful child pages. -* 4545a8b SERVER-19573: Change row-store inserts to avoid page locking. -* b52d2d3 SERVER-19751: Retry pthread_create on EAGAIN or EINTR. -* 46b4ad5 SERVER-19954: Don't scan tracked handles during checkpoints. -* 65abd20 SERVER-19989: Add a write barrier before data handles are added to shared lists. -* 3e46e79 SERVER-19990: Don't assert on eviction of live updates from dead trees. -* 38dad39 SERVER-20008: Don't reset eviction walks when hitting a busy page. -* 3b72361 SERVER-20159: Make all readers wait while the cache is full. -* 8be547b SERVER-20193: Fix obsolete transaction check. -* ad56c6a SERVER-20303: Tune in-memory splits when inserting large objects. -* 7505a02 SERVER-20385: Make WT_CURSOR::next(random) more random. -* 35d46c3 SERVER-21027: Reverse split if there are many deleted pages. -* a6da10e SERVER-21553: Enable fast-path truncate after splits. -* 890ee34 SERVER-21619: Don't do internal page splits after a tree is marked DEAD. -* 0e93d60 SERVER-21691: Avoid insert stalls. +* SERVER-18829 Have pages start in the middle of the LRU queue for eviction. +* SERVER-18838 During drops, don't remove files until the metadata is durable. +* SERVER-18875 Clean up deleted pages. +* SERVER-18899 Add unit test to simulate fsyncLock. +* SERVER-19340 Avoid type aliasing in the random number generator. +* SERVER-19445 Have the oldest transaction update the oldest tracked ID. +* SERVER-19522 Try to evict internal pages with no useful child pages. +* SERVER-19573 Change row-store inserts to avoid page locking. +* SERVER-19751 Retry pthread_create on EAGAIN or EINTR. +* SERVER-19954 Don't scan tracked handles during checkpoints. +* SERVER-19989 Add a write barrier before data handles are added to shared lists. +* SERVER-19990 Don't assert on eviction of live updates from dead trees. +* SERVER-20008 Don't reset eviction walks when hitting a busy page. +* SERVER-20159 Make all readers wait while the cache is full. +* SERVER-20193 Fix obsolete transaction check. +* SERVER-20303 Tune in-memory splits when inserting large objects. +* SERVER-20385 Make WT_CURSOR::next(random) more random. +* SERVER-21027 Reverse split if there are many deleted pages. +* SERVER-21553 Enable fast-path truncate after splits. +* SERVER-21619 Don't do internal page splits after a tree is marked DEAD. +* SERVER-21691 Avoid insert stalls. Other note worthy changes since the previous release: -* bc2aa57 WT-1744: Throttle worker threads based on eviction targets. -* 55a989e WT-1845: Allow read only transactions to commit after failure. -* df625dc WT-1869: Avoid doing in memory splits while checkpointing a tree. -* ddac54f WT-1942: Add atomic implementations for PPC64 architecture. -* 3866fa6 WT-1962: Make the hot_backup_lock a read/write lock. -* 58f9e99 WT-1963: Fix backup cursor Java API. -* 4e0fe59 WT-1964: Fix a bug in the Java API when closing handles from a different thread. -* 60e2150 WT-1966: Change how the shared cache assigns priority to participants. -* 76d2e73 WT-1975: Ensure previous log files are complete for forced sync. -* e43b22a WT-1977: Improve performance of getting snapshots with many sessions. -* 5eaf63e WT-1978: Better checking and tests for index cursor comparison. -* 1602a4b WT-1981: Fix a signed 32-bit integer unpacking bug. -* cd1704d WT-1982: Fix a bug where cached overflow items were freed too early. -* 57a9f38 WT-1985: Integer packing and other fixes for Python and Java. -* 9897eb2 WT-1986: Fix a race renaming temporary log files. -* b10bff9 WT-1989: Improve scalability of log writes. -* f8dc12b WT-1996: Fix a bug where we would free the fist update during a page rewrite on error. -* 144a383 WT-1998: Fixes for indexes with some rarely used key/value formats. -* 8af8b8a WT-2002: Fix a bug in verify where it would panic when encountering a corrupted file. -* e1d8bc7 WT-2007: Statically allocate log slot buffers to a maximum size. -* 911158c WT-2008: Fix a bug in recovery where a file create went missing. -* 3e2e7e6 WT-2009: Apply tracked metadata operations post-commit. -* 1255cb2 WT-2012: Fix a bug updating the oldest ID. -* ef9d56f WT-2013: Add gcc asm definitions for ARM64. -* c8633e6 WT-2014: Fix a bug in checkpoints where files could be flushed in the wrong order. -* 9b09e69 WT-2015: Fix a bug in error handling during block open. -* 4938b8d WT-2017: Once an eviction server thread is started keep it running. -* 298f86c WT-2019: Fix a logic bug tracking the maximum transaction ID in clean trees. -* 7d6075c WT-2020: Clarify checksum error failure messages. -* 7b302d3 WT-2021: Fix a bug moving the oldest ID forward (introduced by WT-1967). -* 9df72d7 WT-2022: Fix a bug not releasing a handle when opening a non-existent index cursor. -* 81ffc2d WT-2023: Improve locking primitives: simplify read-write lock operations. -* 6b84722 WT-2029: Improve scalability of statistics. -* f97cfe9 WT-2031: Log slot revamp. -* bee11c3 WT-2032: Improve next_random cursors to work with small trees. -* cf53696 WT-2034: Improve shared cache balancing algorithm. -* aee1c94 WT-2035: For index cursors, keep track of which column groups need to be positioned. -* 36310d4 WT-2036: Make handle sweeps more robust. -* c948fbb WT-2037: Only write a checkpoint to the log on close if it wasn't. -* e25e615 WT-2038: Avoid long scans holding the handle list lock. -* 75a4655 WT-2039: Add error check and unit test for log records over 4 GB. -* 5ab26af WT-2042: Only try to evict tombstones that are visible to all readers. -* ce223ac WT-2045: Don't let the eviction server do slow reconciliation, it can stall eviction. -* 6665618 WT-2046: Add a statistic for search restarts. -* 98b4a28 WT-2047: Fix a bug in the random generator code to handle an uninitialized state. -* 258e2e1 WT-2050: Show size with memory allocation errors. -* 2e1471c WT-2053: Fix a bug in disk verify messages. -* e316e61 WT-2056: Reorder btree cursor close so stats are maintained correctly. -* 70f9100 WT-2057: Remove the verbose configuration when writing the base configuration file. -* 41b6fb8 WT-2058: Fix an alignment bug in the mutex and log-slot code. -* d72012b WT-2059: Include non-aggregated stats in cursor results. -* 3e0c7bf WT-2062: Try harder to make progress on in-memory splits. -* 66757f7 WT-2064: Don't spin indefinitely waiting for the handle list lock in eviction. -* 8f42f02 WT-2066: Update the oldest transaction ID from eviction. -* e167592 WT-2068: Protect discarding handles with the handle list lock. -* fd72a09 WT-2075: Fix a hang in logging with parallel workload. -* 11c0fa0 WT-2078: Fix a bug in error handling with statistics cursors. -* 9734d85 WT-2081: Make verify progress reporting less verbose. -* 6008b41 WT-2085: Run some of the log_server threads operations more frequently. -* 39a69ec WT-2086: Add a statistic to track when eviction finds a page that can be split. -* 334e103 WT-2089: Relax restrictions on multiblock eviction and in-memory splits. -* f13b788 WT-2090: Fix a bug in the Windows OS layer that swallowed error returns. -* 83b8db7 WT-2092: Free log condition variables after all threads are joined. -* d9391c0 WT-2093: Use the C99 bool type to clarify when functions return true/false. -* f883d27 WT-2094: Eliminate direct write and record unbuffered log records. -* 9008260 WT-2097: Reintroduce immediate waits when forced eviction is necessary. -* ff1da28 WT-2100: Rename evict to evict_queue so it's easier to search for. -* 41db2ee WT-2101: Don't update the logging ckpt_lsn on clean shutdown. -* e1d6886 WT-2102: Fix a hang in log slot join when forcing log writes. -* 0e96683 WT-2105: Fix a bug where we could reference an invalid memory address if a file is - corrupted on disk. -* 6a565bc WT-2108: Rework in-memory page rewrite support (WT_PM_REC_REWRITE). -* dcb0ddb WT-2114: Make application eviction fairer. -* 10c2f15 WT-2115: Don't skip truncated pages that are part of a checkpoint. -* cd6ce97 WT-2116: Add diagnostic checks for stuck cache and dump the state. -* 51cf672 WT-2119: Don't evict clean multiblock pages with overflow items during checkpoints. -* 346ad40 WT-2126: Clean up if there is an error during splits. -* 6831485 WT-2127: Deepen the tree more regularly to avoid wide internal pages. -* a0b5d2b WT-2128: When decoding huffman encoding during salvage it's possible to have fewer bits - than the symbol length during decoding, if the value has been corrupted. -* 79f74e5 WT-2131: Switch to using a lock to control page splits to avoid starvation. -* 02a3d9f WT-2132: Make debug dump function more robust to errors. -* 8c223e4 WT-2134: Flush all buffered log records in log_flush. -* d1b5e7f WT-2135: Fix log_only setting for backup cursor. Fix initialization. -* aab8101 WT-2137: Check the sync_lsn is in the correct file before moving it forward. -* 323af84 WT-2139: Fix a transaction visibility bug in read-uncommitted transactions. -* 751c628 WT-2146: Improve performance when searching for short keys. -* 62998ce WT-2148: Fix a compiler warning in encoding functions. -* 6c16fdd WT-2153: Fix bug. Now we always need to start the log_server thread. -* 6a5fca3 WT-2154: Make btree dump safer. -* 0d74bc6 WT-2155: Remove last use of F_CAS_ATOMIC and the associated macro. -* cc42bda WT-2156: Allow eviction workers to restart. -* bf1d359 WT-2157: Fix a bug where a failed page split could lead to incomplete checkpoints. -* ce9d265 WT-2159: Don't check the config twice in one path. -* 544f27d WT-2162: Add null pointer check, needed after an index is dropped. -* 0d85ebe WT-2164: Prevent another LSM chunk checkpoint while the first is still in progress. -* a81aae8 WT-2165: Stop using FALLOC_FL_KEEP_SIZE flag when pre-allocating files. -* 2865a76 WT-2167: Switch recovery to using an internal session. -* 5d4c952 WT-2170: Protect the turtle file with a lock. -* 497b744 WT-2174: Avoid the table list lock when creating a size only statistics cursor. -* fdfa804 WT-2178: In-memory storage engine support. -* b9bd01f WT-2179: Added decorator to mark txn13 as part of the --long test suite. -* be544dd WT-2180: Remove cursor.{search,search-near,remove} key size validation. -* be412b5 WT-2182: When internal pages grow large enough, split them into their parents. -* c27e78e WT-2184: Fix log scan bug when final record has many trailing zeros. -* 9584be3 WT-2185: Don't do reverse splits when closing a file. -* f6b12d3 WT-2187: Add flag for flushing a slot. -* a4545bf WT-2189: Update flag set and clear macros to be less error prone. -* 30ab327 WT-2191: In-memory disk image no longer the same as saved updates. -* 4ba5698 WT-2192: Fix the logic around checking whether internal page is evictable. -* 2f0b3e2 WT-2193: Handle read-committed metadata checkpoints during snapshot transactions. -* 9b1febc WT-2194: Java close callbacks should handle cursors that Java code did not open. -* 438f455 WT-2195: Fix a hang after giving up on a reverse split. -* ff27fe9 WT-2196: Fix error handling in size only statistics. -* 0a1ee34 WT-2199: Fix transaction sync inconsistency. -* 2ff1fd6 WT-2203: Release an allocated page on error. -* 3b3cf2a WT-2204: Don't take a local copy of page->modify until we know the page is dirty. -* 179d4d0 WT-2206: Change cache operations from flags to an enumeration. -* 82514ca WT-2207: Track whenever a session has a handle exclusive. -* 78bd4ac WT-2210: Raw compression fails if row-store recovery precedes column-store recovery. -* c360d53 WT-2212: Add a "use_environment" config to ::wiredtiger_open. -* a72ddb7 WT-2218: Add truncate stats. -* ce8c091 WT-2219: Enhancements to in-memory testing. -* e2f1130 WT-2220: Update time comparison macros. -* 59857f9 WT-2222: Add statistics for named snapshots. -* fb9cebe WT-2224: Track which deleted refs are discarded by a split. -* cace179 WT-2228: Avoid unnecessary raw-compression calls. -* 0a52a80 WT-2237: Have threads publish unique transaction IDs so that updates always become - visible immediately on commit. -* 6c7338f WT-2241: Use a lock to protect transaction ID allocation. -* 39dfd21 WT-2243: Don't keep transaction IDs pinned for reading from checkpoints. -* 4c49948 WT-2244: Trigger in-memory splits sooner. -* 9f2e4f3 WT-2248: WT_SESSION::close is updating WT_CONNECTION_IMPL.default_session. -* 264ec21 WT-2249: Keep eviction stuck until cache usage is under 100%. -* dca1411 WT-2250: Minor fix. Use SET instead of increment for stat. -* e731ef8 WT-2251: Free addresses when we discard deleted page references. -* 4fc3e39 WT-2253: Evict pages left behind by in-memory splits. -* 2df5658 WT-2257: Fixes when given multiple thread workload configurations. -* 4c49043 WT-2260: Avoid adding internal pages to the eviction queue +* WT-1744 Throttle worker threads based on eviction targets. +* WT-1845 Allow read only transactions to commit after failure. +* WT-1869 Avoid doing in memory splits while checkpointing a tree. +* WT-1942 Add atomic implementations for PowerPC architecture. +* WT-1962 Make the hot_backup_lock a read/write lock. +* WT-1963 Fix backup cursor Java API. +* WT-1964 Fix a bug in the Java API when closing handles from a different thread. +* WT-1966 Change how the shared cache assigns priority to participants. +* WT-1975 Ensure previous log files are complete for forced sync. +* WT-1977 Improve performance of getting snapshots with many sessions. +* WT-1978 Better checking and tests for index cursor comparison. +* WT-1981 Fix a signed 32-bit integer unpacking bug. +* WT-1982 Fix a bug where cached overflow items were freed too early. +* WT-1985 Integer packing and other fixes for Python and Java. +* WT-1986 Fix a race renaming temporary log files. +* WT-1989 Improve scalability of log writes. +* WT-1996 Fix a bug where we would free the fist update during a page rewrite on error. +* WT-1998 Fixes for indexes with some rarely used key/value formats. +* WT-2002 Fix a bug in verify where it would panic when encountering a corrupted file. +* WT-2007 Statically allocate log slot buffers to a maximum size. +* WT-2008 Fix a bug in recovery where a file create went missing. +* WT-2009 Apply tracked metadata operations post-commit. +* WT-2012 Fix a bug updating the oldest ID. +* WT-2013 Add gcc asm definitions for ARM64. +* WT-2014 Fix a bug in checkpoints where files could be flushed in the wrong order. +* WT-2015 Fix a bug in error handling during block open. +* WT-2017 Once an eviction server thread is started keep it running. +* WT-2019 Fix a logic bug tracking the maximum transaction ID in clean trees. +* WT-2020 Clarify checksum error failure messages. +* WT-2021 Fix a bug moving the oldest ID forward (introduced by WT-1967). +* WT-2022 Fix a bug not releasing a handle when opening a non-existent index cursor. +* WT-2023 Improve locking primitives: simplify read-write lock operations. +* WT-2029 Improve scalability of statistics. +* WT-2031 Log slot revamp. +* WT-2032 Improve next_random cursors to work with small trees. +* WT-2034 Improve shared cache balancing algorithm. +* WT-2035 For index cursors, keep track of which column groups need to be positioned. +* WT-2036 Make handle sweeps more robust. +* WT-2037 Only write a checkpoint to the log on close if it wasn't. +* WT-2038 Avoid long scans holding the handle list lock. +* WT-2039 Add error check and unit test for log records over 4 GB. +* WT-2042 Only try to evict tombstones that are visible to all readers. +* WT-2045 Don't let the eviction server do slow reconciliation, it can stall eviction. +* WT-2046 Add a statistic for search restarts. +* WT-2047 Fix a bug in the random generator code to handle an uninitialized state. +* WT-2050 Show size with memory allocation errors. +* WT-2053 Fix a bug in disk verify messages. +* WT-2056 Reorder btree cursor close so stats are maintained correctly. +* WT-2057 Remove the verbose configuration when writing the base configuration file. +* WT-2058 Fix an alignment bug in the mutex and log-slot code. +* WT-2059 Include non-aggregated stats in cursor results. +* WT-2062 Try harder to make progress on in-memory splits. +* WT-2064 Don't spin indefinitely waiting for the handle list lock in eviction. +* WT-2066 Update the oldest transaction ID from eviction. +* WT-2068 Protect discarding handles with the handle list lock. +* WT-2075 Fix a hang in logging with parallel workload. +* WT-2078 Fix a bug in error handling with statistics cursors. +* WT-2081 Make verify progress reporting less verbose. +* WT-2085 Run some of the log_server threads operations more frequently. +* WT-2086 Add a statistic to track when eviction finds a page that can be split. +* WT-2089 Relax restrictions on multiblock eviction and in-memory splits. +* WT-2090 Fix a bug in the Windows OS layer that swallowed error returns. +* WT-2092 Free log condition variables after all threads are joined. +* WT-2093 Use the C99 bool type to clarify when functions return true/false. +* WT-2094 Eliminate direct write and record unbuffered log records. +* WT-2097 Reintroduce immediate waits when forced eviction is necessary. +* WT-2100 Rename evict to evict_queue so it's easier to search for. +* WT-2101 Don't update the logging ckpt_lsn on clean shutdown. +* WT-2102 Fix a hang in log slot join when forcing log writes. +* WT-2105 Fix a bug where we could reference an invalid memory address if a file is corrupted on disk. +* WT-2108 Rework in-memory page rewrite support (WT_PM_REC_REWRITE). +* WT-2114 Make application eviction fairer. +* WT-2115 Don't skip truncated pages that are part of a checkpoint. +* WT-2116 Add diagnostic checks for stuck cache and dump the state. +* WT-2119 Don't evict clean multiblock pages with overflow items during checkpoints. +* WT-2126 Clean up if there is an error during splits. +* WT-2127 Deepen the tree more regularly to avoid wide internal pages. +* WT-2128 When decoding huffman encoding during salvage it's possible to have fewer bits than the + symbol length during decoding, if the value has been corrupted. +* WT-2131 Switch to using a lock to control page splits to avoid starvation. +* WT-2132 Make debug dump function more robust to errors. +* WT-2134 Flush all buffered log records in log_flush. +* WT-2135 Fix log_only setting for backup cursor. Fix initialization. +* WT-2137 Check the sync_lsn is in the correct file before moving it forward. +* WT-2139 Fix a transaction visibility bug in read-uncommitted transactions. +* WT-2146 Improve performance when searching for short keys. +* WT-2148 Fix a compiler warning in encoding functions. +* WT-2153 Fix bug. Now we always need to start the log_server thread. +* WT-2154 Make btree dump safer. +* WT-2155 Remove last use of F_CAS_ATOMIC and the associated macro. +* WT-2156 Allow eviction workers to restart. +* WT-2157 Fix a bug where a failed page split could lead to incomplete checkpoints. +* WT-2159 Don't check the config twice in one path. +* WT-2162 Add null pointer check, needed after an index is dropped. +* WT-2164 Prevent another LSM chunk checkpoint while the first is still in progress. +* WT-2165 Stop using FALLOC_FL_KEEP_SIZE flag when pre-allocating files. +* WT-2167 Switch recovery to using an internal session. +* WT-2170 Protect the turtle file with a lock. +* WT-2174 Avoid the table list lock when creating a size only statistics cursor. +* WT-2178 In-memory storage engine support. +* WT-2179 Added decorator to mark txn13 as part of the --long test suite. +* WT-2180 Remove cursor.{search,search-near,remove} key size validation. +* WT-2182 When internal pages grow large enough, split them into their parents. +* WT-2184 Fix log scan bug when final record has many trailing zeros. +* WT-2185 Don't do reverse splits when closing a file. +* WT-2187 Add flag for flushing a slot. +* WT-2189 Update flag set and clear macros to be less error prone. +* WT-2191 In-memory disk image no longer the same as saved updates. +* WT-2192 Fix the logic around checking whether internal page is evictable. +* WT-2193 Handle read-committed metadata checkpoints during snapshot transactions. +* WT-2194 Java close callbacks should handle cursors that Java code did not open. +* WT-2195 Fix a hang after giving up on a reverse split. +* WT-2196 Fix error handling in size only statistics. +* WT-2199 Fix transaction sync inconsistency. +* WT-2203 Release an allocated page on error. +* WT-2204 Don't take a local copy of page->modify until we know the page is dirty. +* WT-2206 Change cache operations from flags to an enumeration. +* WT-2207 Track whenever a session has a handle exclusive. +* WT-2210 Raw compression fails if row-store recovery precedes column-store recovery. +* WT-2212 Add a "use_environment" config to ::wiredtiger_open. +* WT-2218 Add truncate stats. +* WT-2219 Enhancements to in-memory testing. +* WT-2220 Update time comparison macros. +* WT-2222 Add statistics for named snapshots. +* WT-2224 Track which deleted refs are discarded by a split. +* WT-2228 Avoid unnecessary raw-compression calls. +* WT-2237 Have threads publish unique transaction IDs so that updates always become visible + immediately on commit. +* WT-2241 Use a lock to protect transaction ID allocation. +* WT-2243 Don't keep transaction IDs pinned for reading from checkpoints. +* WT-2244 Trigger in-memory splits sooner. +* WT-2248 WT_SESSION::close is updating WT_CONNECTION_IMPL.default_session. +* WT-2249 Keep eviction stuck until cache usage is under 100%. +* WT-2250 Minor fix. Use SET instead of increment for stat. +* WT-2251 Free addresses when we discard deleted page references. +* WT-2253 Evict pages left behind by in-memory splits. +* WT-2257 Fixes when given multiple thread workload configurations. +* WT-2260 Avoid adding internal pages to the eviction queue WiredTiger release 2.6.1, 2015-05-13 ------------------------------------ @@ -237,7 +392,7 @@ New features: refs WT-1908 * Add the ability to flag a transaction to be flushed asynchronously on - commit via a new sync=[background] configuration option. Add a new + commit via a new sync=[background] configuration option. Add a new WT_SESSION::transaction_sync API to wait for asynchronous flushes to complete. refs WT-1908, #1943 @@ -921,7 +1076,7 @@ New features and API changes: * Add new custom extractor functionality to WiredTiger indexes. Allowing an application to define mutated and/or multiple keys for indexes. [#1199] - + * Add a new WT_SESSION::transaction_pinned_range method that allows users to identify when a session is keeping a transaction ID pinned for a long time. [#1314] @@ -1418,7 +1573,7 @@ Significant changes include: [#756, #761] * WiredTiger statistics have been significantly improved: - + Statistics logging has been changed to aggregate information from all open handles. [#709, #717] @@ -1765,7 +1920,7 @@ below: * Make run-time statistics optional, defaulted to "off". * Change how we detect if shared cache is used. It used to rely on a name, - now it will be used if the shared_cache configuration option is included. + now it will be used if the shared_cache configuration option is included. * Add the ability to specify a per-connection reserved size for cache pools. Ensure cache pool reconfiguration is honoured quickly. @@ -1858,7 +2013,7 @@ changes are highlighted below: [392] Move examples/c/ex_test_perf.c to bench/wtperf. [322] Add support for statistics on schema-level objects i.e tables, - column groups, indices. + column groups, indices. * Enhance statistics, including changing the name of some statistics. @@ -2352,9 +2507,9 @@ upgrade. Here is the full list of changes: [#262] Disable dump on child cursors: only the top-level cursor is wrapped in a dump cursor. -[#266] Deal with new / dropped indices in __wt_schema_open_index. +[#266] Deal with new / dropped indices in __wt_schema_open_index. -[#269] Checkpoint handles must not be open when they are overwritten. +[#269] Checkpoint handles must not be open when they are overwritten. [#271] Add support for a reserved checkpoint name "WiredTigerCheckpoint" that opens the object's last checkpoint. @@ -2365,18 +2520,18 @@ upgrade. Here is the full list of changes: cursor equality result in a separate argument. [#275] If exclusive handle is required for an operation and it is not - available, fail immediately: don't block. + available, fail immediately: don't block. [#276] Fix methods that return integer parameters from Python. This - includes cursor.equals and cursor.search_near. + includes cursor.equals and cursor.search_near. [#277] Acquire the schema lock when creating the metadata file. We're single-threaded, so it isn't protecting against anything, but the - handle management code expects to have the schema lock. + handle management code expects to have the schema lock. [#279] Some optimizations for __wt_config_gets_defno. Specifically, if we're dealing with a simple stack of config strings, just parse the - application string rather than the full list of defaults. + application string rather than the full list of defaults. [#279] Split the description string into a set of structures, to reduce the number of string comparisons and manipulation that's required. @@ -2403,19 +2558,19 @@ upgrade. Here is the full list of changes: [#294] If txn_commit fails, document the transaction was rolled-back. [#295] Expand the documentation on using cursors without explicit - transactions. + transactions. [#300] Include all changes whenever closing a file, don't check for - visibility. If updates are skipped while evicting a page, give up. + visibility. If updates are skipped while evicting a page, give up. [#305] Have "wt dump" fail more gracefully if the object doesn't exist. [#310] When freeing a tracked address in reconciliation, clear it to avoid - freeing the same address again on error. + freeing the same address again on error. [#314] Replace cursor.equals with cursor.compare -[#319] Clear the bulk_load_ok flag when closing handles. +[#319] Clear the bulk_load_ok flag when closing handles. * Add an "ancient transaction" statistic so we can find out if they're @@ -2610,7 +2765,7 @@ Fix the "exclusive" config for WT_SESSION::create. [#181] Fix an eviction bug introduced into 1.1.2: when evicting a page with children, remove the children from the LRU eviction queue. Reduce the impact of clearing a page from the LRU queue by marking pages on the queue with a flag -(WT_PAGE_EVICT_LRU). +(WT_PAGE_EVICT_LRU). During an eviction walk, pin pages up to the root so there is no need to spin when attempting to lock a parent page. Use the EVICT_LRU page flag to avoid diff --git a/src/third_party/wiredtiger/README b/src/third_party/wiredtiger/README index 07dde47feaf..32549d099d5 100644 --- a/src/third_party/wiredtiger/README +++ b/src/third_party/wiredtiger/README @@ -1,6 +1,6 @@ -WiredTiger 2.7.1: (December 8, 2015) +WiredTiger 2.8.1: (March 24, 2016) -This is version 2.7.1 of WiredTiger. +This is version 2.8.1 of WiredTiger. WiredTiger release packages and documentation can be found at: @@ -8,7 +8,7 @@ WiredTiger release packages and documentation can be found at: The documentation for this specific release can be found at: - http://source.wiredtiger.com/2.7.1/index.html + http://source.wiredtiger.com/2.8.1/index.html The WiredTiger source code can be found at: diff --git a/src/third_party/wiredtiger/RELEASE_INFO b/src/third_party/wiredtiger/RELEASE_INFO index d2c7995910e..d29c29b554c 100644 --- a/src/third_party/wiredtiger/RELEASE_INFO +++ b/src/third_party/wiredtiger/RELEASE_INFO @@ -1,5 +1,5 @@ WIREDTIGER_VERSION_MAJOR=2 -WIREDTIGER_VERSION_MINOR=7 +WIREDTIGER_VERSION_MINOR=8 WIREDTIGER_VERSION_PATCH=1 WIREDTIGER_VERSION="$WIREDTIGER_VERSION_MAJOR.$WIREDTIGER_VERSION_MINOR.$WIREDTIGER_VERSION_PATCH" diff --git a/src/third_party/wiredtiger/bench/wtperf/misc.c b/src/third_party/wiredtiger/bench/wtperf/misc.c index bdfd53d5295..2821216f240 100644 --- a/src/third_party/wiredtiger/bench/wtperf/misc.c +++ b/src/third_party/wiredtiger/bench/wtperf/misc.c @@ -54,7 +54,7 @@ setup_log_file(CONFIG *cfg) return (ret); /* Use line buffering for the log file. */ - (void)setvbuf(cfg->logf, NULL, _IOLBF, 32); + __wt_stream_set_line_buffer(cfg->logf); return (0); } diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf.c b/src/third_party/wiredtiger/bench/wtperf/wtperf.c index 340c400ba7e..9d57bdcf6b0 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf.c +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf.c @@ -1233,7 +1233,7 @@ monitor(void *arg) goto err; } /* Set line buffering for monitor file. */ - (void)setvbuf(fp, NULL, _IOLBF, 0); + __wt_stream_set_line_buffer(fp); fprintf(fp, "#time," "totalsec," @@ -2313,7 +2313,7 @@ main(int argc, char *argv[]) cfg->table_name); /* Make stdout line buffered, so verbose output appears quickly. */ - (void)setvbuf(stdout, NULL, _IOLBF, 32); + __wt_stream_set_line_buffer(stdout); /* Concatenate non-default configuration strings. */ if (cfg->verbose > 1 || user_cconfig != NULL || diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c b/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c index bba1f629715..a98fd9b18d7 100644 --- a/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c +++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_throttle.c @@ -104,10 +104,9 @@ worker_throttle(CONFIG_THREAD *thread) */ WT_RET(__wt_epoch(NULL, &throttle_cfg->last_increment)); } else { - throttle_cfg->ops_count = - (uint64_t) (float)(usecs_delta / - throttle_cfg->usecs_increment) * - (float)throttle_cfg->ops_per_increment; + throttle_cfg->ops_count = (usecs_delta * + throttle_cfg->ops_per_increment) / + throttle_cfg->usecs_increment; throttle_cfg->last_increment = now; } diff --git a/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4 b/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4 index 6c7c6eed9cf..997f571782c 100644 --- a/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4 +++ b/src/third_party/wiredtiger/build_posix/aclocal/version-set.m4 @@ -1,14 +1,14 @@ dnl build by dist/s_version VERSION_MAJOR=2 -VERSION_MINOR=7 +VERSION_MINOR=8 VERSION_PATCH=1 -VERSION_STRING='"WiredTiger 2.7.1: (December 8, 2015)"' +VERSION_STRING='"WiredTiger 2.8.1: (March 24, 2016)"' AC_SUBST(VERSION_MAJOR) AC_SUBST(VERSION_MINOR) AC_SUBST(VERSION_PATCH) AC_SUBST(VERSION_STRING) -VERSION_NOPATCH=2.7 +VERSION_NOPATCH=2.8 AC_SUBST(VERSION_NOPATCH) diff --git a/src/third_party/wiredtiger/build_posix/aclocal/version.m4 b/src/third_party/wiredtiger/build_posix/aclocal/version.m4 index 3b690982f9d..0e199cb9546 100644 --- a/src/third_party/wiredtiger/build_posix/aclocal/version.m4 +++ b/src/third_party/wiredtiger/build_posix/aclocal/version.m4 @@ -1,2 +1,2 @@ dnl WiredTiger product version for AC_INIT. Maintained by dist/s_version -2.7.1 +2.8.1 diff --git a/src/third_party/wiredtiger/build_win/filelist.win b/src/third_party/wiredtiger/build_win/filelist.win index b6a9caf4a74..c370303d5f8 100644 --- a/src/third_party/wiredtiger/build_win/filelist.win +++ b/src/third_party/wiredtiger/build_win/filelist.win @@ -101,33 +101,28 @@ src/meta/meta_ext.c src/meta/meta_table.c src/meta/meta_track.c src/meta/meta_turtle.c -src/os_posix/os_abort.c -src/os_posix/os_alloc.c -src/os_posix/os_getline.c -src/os_posix/os_getopt.c -src/os_posix/os_mtx_rw.c -src/os_posix/os_stdio.c -src/os_posix/os_strtouq.c +src/os_common/filename.c +src/os_common/os_abort.c +src/os_common/os_alloc.c +src/os_common/os_fhandle.c +src/os_common/os_fs_inmemory.c +src/os_common/os_fs_stdio.c +src/os_common/os_getline.c +src/os_common/os_getopt.c +src/os_common/os_init.c +src/os_common/os_strtouq.c src/os_win/os_dir.c src/os_win/os_dlopen.c src/os_win/os_errno.c -src/os_win/os_exist.c -src/os_win/os_fallocate.c -src/os_win/os_filesize.c -src/os_win/os_flock.c -src/os_win/os_fsync.c -src/os_win/os_ftruncate.c +src/os_win/os_fs.c src/os_win/os_getenv.c src/os_win/os_map.c src/os_win/os_mtx_cond.c src/os_win/os_once.c -src/os_win/os_open.c src/os_win/os_pagesize.c src/os_win/os_path.c src/os_win/os_priv.c -src/os_win/os_remove.c -src/os_win/os_rename.c -src/os_win/os_rw.c +src/os_win/os_setvbuf.c src/os_win/os_sleep.c src/os_win/os_snprintf.c src/os_win/os_thread.c @@ -158,13 +153,13 @@ src/support/cksum.c src/support/cond_auto.c src/support/crypto.c src/support/err.c -src/support/filename.c src/support/global.c src/support/hash_city.c src/support/hash_fnv.c src/support/hazard.c src/support/hex.c src/support/huffman.c +src/support/mtx_rw.c src/support/pow.c src/support/rand.c src/support/scratch.c diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 02aee1e8825..5ca294a5d60 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -569,6 +569,7 @@ connection_runtime_config = [ 'evict', 'evictserver', 'fileops', + 'handleops', 'log', 'lsm', 'lsm_manager', diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist index 350e0c50087..1d7ffa76922 100644 --- a/src/third_party/wiredtiger/dist/filelist +++ b/src/third_party/wiredtiger/dist/filelist @@ -101,34 +101,30 @@ src/meta/meta_ext.c src/meta/meta_table.c src/meta/meta_track.c src/meta/meta_turtle.c -src/os_posix/os_abort.c -src/os_posix/os_alloc.c +src/os_common/filename.c +src/os_common/os_abort.c +src/os_common/os_alloc.c +src/os_common/os_fhandle.c +src/os_common/os_fs_inmemory.c +src/os_common/os_fs_stdio.c +src/os_common/os_getline.c +src/os_common/os_getopt.c +src/os_common/os_init.c +src/os_common/os_strtouq.c src/os_posix/os_dir.c src/os_posix/os_dlopen.c src/os_posix/os_errno.c -src/os_posix/os_exist.c src/os_posix/os_fallocate.c -src/os_posix/os_filesize.c -src/os_posix/os_flock.c -src/os_posix/os_fsync.c -src/os_posix/os_ftruncate.c +src/os_posix/os_fs.c src/os_posix/os_getenv.c -src/os_posix/os_getline.c -src/os_posix/os_getopt.c src/os_posix/os_map.c src/os_posix/os_mtx_cond.c -src/os_posix/os_mtx_rw.c src/os_posix/os_once.c -src/os_posix/os_open.c src/os_posix/os_pagesize.c src/os_posix/os_path.c src/os_posix/os_priv.c -src/os_posix/os_remove.c -src/os_posix/os_rename.c -src/os_posix/os_rw.c +src/os_posix/os_setvbuf.c src/os_posix/os_sleep.c -src/os_posix/os_stdio.c -src/os_posix/os_strtouq.c src/os_posix/os_thread.c src/os_posix/os_time.c src/os_posix/os_yield.c @@ -156,13 +152,13 @@ src/support/cksum.c src/support/cond_auto.c src/support/crypto.c src/support/err.c -src/support/filename.c src/support/global.c src/support/hash_city.c src/support/hash_fnv.c src/support/hazard.c src/support/hex.c src/support/huffman.c +src/support/mtx_rw.c src/support/pow.c src/support/power8/crc32.S src/support/power8/crc32_wrapper.c diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py index f500e3b1ae1..8f7827ad160 100644 --- a/src/third_party/wiredtiger/dist/flags.py +++ b/src/third_party/wiredtiger/dist/flags.py @@ -13,7 +13,7 @@ flags = { 'FILE_TYPE_DATA', 'FILE_TYPE_DIRECTORY', 'FILE_TYPE_LOG', - 'FILE_TYPE_TURTLE', + 'FILE_TYPE_REGULAR', ], 'log_scan' : [ 'LOGSCAN_FIRST', @@ -65,6 +65,7 @@ flags = { 'VERB_EVICT', 'VERB_EVICTSERVER', 'VERB_FILEOPS', + 'VERB_HANDLEOPS', 'VERB_LOG', 'VERB_LSM', 'VERB_LSM_MANAGER', diff --git a/src/third_party/wiredtiger/dist/log.py b/src/third_party/wiredtiger/dist/log.py index 6d35bf2e718..9201b20054b 100644 --- a/src/third_party/wiredtiger/dist/log.py +++ b/src/third_party/wiredtiger/dist/log.py @@ -89,7 +89,7 @@ def printf_line(f, optype, i, ishex): ifbegin = 'if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {' + nl_indent if postcomma == '': precomma = ',\\n' - body = '%s%s(__wt_fprintf(out,' % ( + body = '%s%s(__wt_fprintf(session, WT_STDOUT(session),' % ( printf_setup(f, ishex, nl_indent), 'WT_ERR' if has_escape(optype.fields) else 'WT_RET') + \ '%s "%s \\"%s\\": \\"%s\\"%s",%s));' % ( @@ -292,16 +292,16 @@ __wt_logop_%(name)s_unpack( last_field = optype.fields[-1] tfile.write(''' int -__wt_logop_%(name)s_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_logop_%(name)s_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { %(arg_ret)s\t%(arg_decls)s \t%(arg_unused)s%(arg_init)sWT_RET(__wt_logop_%(name)s_unpack( \t session, pp, end%(arg_addrs)s)); -\tWT_RET(__wt_fprintf(out, " \\"optype\\": \\"%(name)s\\",\\n")); +\tWT_RET(__wt_fprintf(session, WT_STDOUT(session), +\t " \\"optype\\": \\"%(name)s\\",\\n")); \t%(print_args)s %(arg_fini)s } @@ -324,9 +324,8 @@ __wt_logop_%(name)s_print( # Emit the printlog entry point tfile.write(''' int -__wt_txn_op_printlog( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_txn_op_printlog(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { \tuint32_t optype, opsize; @@ -342,8 +341,7 @@ for optype in log_data.optypes: tfile.write(''' \tcase %(macro)s: -\t\tWT_RET(%(print_func)s(session, pp, end, out, -\t\t flags)); +\t\tWT_RET(%(print_func)s(session, pp, end, flags)); \t\tbreak; ''' % { 'macro' : optype.macro_name(), diff --git a/src/third_party/wiredtiger/dist/package/wiredtiger.spec b/src/third_party/wiredtiger/dist/package/wiredtiger.spec index 5bbb26885c0..365c330d86a 100644 --- a/src/third_party/wiredtiger/dist/package/wiredtiger.spec +++ b/src/third_party/wiredtiger/dist/package/wiredtiger.spec @@ -1,5 +1,5 @@ Name: wiredtiger -Version: 2.7.1 +Version: 2.8.1 Release: 1%{?dist} Summary: WiredTiger data storage engine diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list index e3f0dc7f181..c9777c86675 100644 --- a/src/third_party/wiredtiger/dist/s_define.list +++ b/src/third_party/wiredtiger/dist/s_define.list @@ -16,6 +16,7 @@ WIN32_LEAN_AND_MEAN WT_ATOMIC_CAS WT_ATOMIC_FUNC WT_BLOCK_DESC_SIZE +WT_BLOCK_HEADER_SIZE WT_CACHE_LINE_ALIGNMENT WT_COMPILER_TYPE_ALIGN WT_CONN_CHECK_PANIC diff --git a/src/third_party/wiredtiger/dist/s_funcs.list b/src/third_party/wiredtiger/dist/s_funcs.list index 8d32eecdfb7..c0d9f2e688f 100644 --- a/src/third_party/wiredtiger/dist/s_funcs.list +++ b/src/third_party/wiredtiger/dist/s_funcs.list @@ -22,7 +22,6 @@ __wt_debug_set_verbose __wt_debug_tree __wt_debug_tree_all __wt_debug_tree_shape -__wt_fsync __wt_lex_compare __wt_lex_compare_skip __wt_log_scan @@ -31,6 +30,7 @@ __wt_nlpo2_round __wt_print_huffman_code __wt_stat_join_aggregate __wt_stat_join_clear_all +__wt_stream_set_no_buffer __wt_try_readlock wiredtiger_config_parser_open wiredtiger_config_validate diff --git a/src/third_party/wiredtiger/dist/s_prototypes b/src/third_party/wiredtiger/dist/s_prototypes index 603c0f5633d..4ceb69f4c77 100755 --- a/src/third_party/wiredtiger/dist/s_prototypes +++ b/src/third_party/wiredtiger/dist/s_prototypes @@ -4,13 +4,10 @@ t=__wt.$$ trap 'rm -f $t; exit 0' 0 1 2 3 13 15 -( -cat <<EOF -/* DO NOT EDIT: automatically built by dist/s_prototypes. */ - -EOF - -for i in `sed -e '/^[a-z]/!d' filelist`; do +# proto -- +# extract public functions. +proto() +{ sed -n \ -e '/^__wt_[a-z]/!{' \ -e h \ @@ -32,9 +29,34 @@ for i in `sed -e '/^[a-z]/!d' filelist`; do -e 's/ */ /g' \ -e 's/^/extern /' \ -e 's/WT_GCC_FUNC_/WT_GCC_FUNC_DECL_/' \ - -e 's/$/;/p' \ - < ../$i -done) > $t + -e 's/$/;/p' < $1 +} + +( +cat <<EOF +/* DO NOT EDIT: automatically built by dist/s_prototypes. */ + +EOF + +# First, get prototypes for everything but the OS directories. +# Second, get prototypes for the OS directories. +# The reason for this is because the OS directories repeat names (that is, there +# are common names in both os_posix and os_win), and so we sort the prototypes +# to avoid repeating them in the output (which some compilers won't tolerate). +# We'd sort everything and discard duplicates, but we can't sort when function +# signatures are on multiple lines, that is, #ifdef'd function signatures. Since +# the OS directories are the only places with repeated names, and they have no +# #ifdef'd signatures, we do it this way. +l=`sed -e '/^[a-z]/!d' -e '/src\/os/d' filelist` +for i in $l; do + proto ../$i +done +l=`echo ../src\/os*/*.c` + +for i in $l; do + proto $i +done | tee xxx | env LC_ALL=C sort -u +) > $t f=../src/include/extern.h cmp $t $f > /dev/null 2>&1 || diff --git a/src/third_party/wiredtiger/dist/s_stat b/src/third_party/wiredtiger/dist/s_stat index 44c22ab56bb..3938b8e65eb 100755 --- a/src/third_party/wiredtiger/dist/s_stat +++ b/src/third_party/wiredtiger/dist/s_stat @@ -11,7 +11,7 @@ l=`sed \ -e 's,#.*,,' \ -e '/^$/d' \ -e 's,^,../,' filelist` -l="$l `echo ../src/include/*.i`" +l="$l `echo ../src/include/*.i ../src/include/os.h`" ( # Get the list of statistics fields. diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 6762521ca76..eed034abb47 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -67,7 +67,9 @@ CloseHandle Comparator Config Coverity +CreateFileA CreateFileMapping +CreateFileMappingA Crummey CustomersPhone DECL @@ -76,11 +78,13 @@ DESC DHANDLE DNE DOI +DONTNEED DUPLICATEV DbCursor DbEnv Decrement Decrypt +DeleteFileA EAGAIN EBUSY EEXIST @@ -187,6 +191,7 @@ LoadLoad LockFile Lookaside Lookup +MADV MALLOC MEM MEMALIGN @@ -242,12 +247,14 @@ Pandis Phong PlatformSDK Posix +PowerPC Pre Preload Prepend Qsort RCS RDNOLOCK +RDONLY RECNO REF's REFs @@ -260,6 +267,7 @@ RNG RPC RUNDIR Radu +ReadFile Readonly Rebalance RedHat @@ -317,6 +325,7 @@ UTF UltraSparc Unbuffered UnixLib +UnlockFile Unmap UnmapViewOfFile Unmarshall @@ -332,9 +341,11 @@ Vixie Vo VxWorks WAL +WILLNEED WIREDTIGER WRLSN WRNOLOCK +WaitForSingleObject WakeAllConditionVariable Wconditional WeakHashLen @@ -352,6 +363,7 @@ WiredTigerPreplog WiredTigerTmplog WiredTigerTxn WithSeeds +WriteFile Wuninitialized Wunused XP @@ -454,6 +466,7 @@ ckpt ckptfrag ckptlist cksum +cloexec clsm cmd cmp @@ -595,12 +608,17 @@ fallocate fblocks fclose fcntl +fd +fdatasync +fdopen ffc fflush ffs +fgetc fgetln fh filefrag +filehandle fileid filename filenames @@ -631,7 +649,9 @@ func gcc gdb ge +getc getenv +getlasterror getline getone getones @@ -647,6 +667,7 @@ gostring gostruct goutf gt +handleops hashval havesize hdr @@ -663,6 +684,7 @@ icount idx ifdef's ikey +im impl incase incr @@ -679,6 +701,7 @@ initsize initval inline inmem +inmemory insertK insertV inserters @@ -686,6 +709,7 @@ instantiation intl intnum intpack +intptr intrin inuse io @@ -838,6 +862,7 @@ optimizations optype ori os +osfhandle ovfl ownp packv @@ -863,8 +888,10 @@ postsize powerpc pragmas pre +pread prealloc preload +preloaded prepend prepended prepending @@ -883,6 +910,7 @@ pushms putK putV pv +pwrite py qdown qrrSS @@ -941,6 +969,7 @@ sessionp setkv setstr setv +setvbuf sfence sii sizeof @@ -1035,6 +1064,7 @@ undef unencrypted unesc unescaped +unicode uninstantiated unistd unlinked diff --git a/src/third_party/wiredtiger/dist/s_style b/src/third_party/wiredtiger/dist/s_style index 78fb7a6eb03..a163eb83b25 100755 --- a/src/third_party/wiredtiger/dist/s_style +++ b/src/third_party/wiredtiger/dist/s_style @@ -60,11 +60,12 @@ else echo "$f: use TAILQ for all lists" fi - if ! expr "$f" : 'src/os_posix/.*' > /dev/null && + if ! expr "$f" : 'src/os_common/.*' > /dev/null && + ! expr "$f" : 'src/os_posix/.*' > /dev/null && ! expr "$f" : 'src/os_win/.*' > /dev/null && ! expr "$f" : 'src/include/extern.h' > /dev/null && ! expr "$f" : 'src/include/os.h' > /dev/null && - grep '__wt_errno' $f > $t; then + grep '__wt_errno' $f > $t; then echo "$f: upper-level code should not call __wt_errno" cat $t fi @@ -83,6 +84,12 @@ else cat $t } + if ! expr "$f" : 'src/.*/os_setvbuf.c' > /dev/null && + egrep -w 'setvbuf' $f > $t; then + echo "$f: setvbuf call, use WiredTiger library replacements" + cat $t + fi + # Alignment directive before "struct". egrep 'WT_COMPILER_TYPE_ALIGN.*struct' $f > $t test -s $t && { diff --git a/src/third_party/wiredtiger/dist/s_win b/src/third_party/wiredtiger/dist/s_win index 0b7d5184037..562e89f94c6 100755 --- a/src/third_party/wiredtiger/dist/s_win +++ b/src/third_party/wiredtiger/dist/s_win @@ -43,40 +43,33 @@ win_filelist() { f='../build_win/filelist.win' - # Process the files for which there's a Windows-specific version, then - # append Windows-only files and discard POSIX-only files. - (sed \ - -e 's;os_posix/os_dir.c;os_win/os_dir.c;' \ - -e 's;os_posix/os_dlopen.c;os_win/os_dlopen.c;' \ - -e 's;os_posix/os_dlopen.c;os_win/os_dlopen.c;' \ - -e 's;os_posix/os_dlopen.c;os_win/os_dlopen.c;' \ - -e 's;os_posix/os_errno.c;os_win/os_errno.c;' \ - -e 's;os_posix/os_exist.c;os_win/os_exist.c;' \ - -e 's;os_posix/os_fallocate.c;os_win/os_fallocate.c;' \ - -e 's;os_posix/os_filesize.c;os_win/os_filesize.c;' \ - -e 's;os_posix/os_flock.c;os_win/os_flock.c;' \ - -e 's;os_posix/os_fsync.c;os_win/os_fsync.c;' \ - -e 's;os_posix/os_ftruncate.c;os_win/os_ftruncate.c;' \ - -e 's;os_posix/os_getenv.c;os_win/os_getenv.c;' \ - -e 's;os_posix/os_map.c;os_win/os_map.c;' \ - -e 's;os_posix/os_mtx_cond.c;os_win/os_mtx_cond.c;' \ - -e 's;os_posix/os_once.c;os_win/os_once.c;' \ - -e 's;os_posix/os_open.c;os_win/os_open.c;' \ - -e 's;os_posix/os_pagesize.c;os_win/os_pagesize.c;' \ - -e 's;os_posix/os_path.c;os_win/os_path.c;' \ - -e 's;os_posix/os_priv.c;os_win/os_priv.c;' \ - -e 's;os_posix/os_remove.c;os_win/os_remove.c;' \ - -e 's;os_posix/os_rename.c;os_win/os_rename.c;' \ - -e 's;os_posix/os_rw.c;os_win/os_rw.c;' \ - -e 's;os_posix/os_sleep.c;os_win/os_sleep.c;' \ - -e 's;os_posix/os_thread.c;os_win/os_thread.c;' \ - -e 's;os_posix/os_time.c;os_win/os_time.c;' \ - -e 's;os_posix/os_yield.c;os_win/os_yield.c;' \ + # Discard POSIX-only and PPC-only files, add in Windows-only files. + ( + sed \ + -e '/\/os_posix\//d' \ -e '/src\/support\/power8\/crc32.S/d' \ -e '/src\/support\/power8\/crc32_wrapper.c/d' - echo 'src/os_win/os_snprintf.c' - echo 'src/os_win/os_vsnprintf.c') < filelist | sort > $t - cmp $t $f > /dev/null 2>&1 || + + echo 'src/os_win/os_dir.c' + echo 'src/os_win/os_dlopen.c' + echo 'src/os_win/os_errno.c' + echo 'src/os_win/os_fs.c' + echo 'src/os_win/os_getenv.c' + echo 'src/os_win/os_map.c' + echo 'src/os_win/os_mtx_cond.c' + echo 'src/os_win/os_once.c' + echo 'src/os_win/os_pagesize.c' + echo 'src/os_win/os_path.c' + echo 'src/os_win/os_priv.c' + echo 'src/os_win/os_setvbuf.c' + echo 'src/os_win/os_sleep.c' + echo 'src/os_win/os_snprintf.c' + echo 'src/os_win/os_thread.c' + echo 'src/os_win/os_time.c' + echo 'src/os_win/os_vsnprintf.c' + echo 'src/os_win/os_yield.c') < filelist | sort > $t + + cmp $t $f > /dev/null 2>&1 || (echo "Building $f" && rm -f $f && cp $t $f) } diff --git a/src/third_party/wiredtiger/examples/c/ex_event_handler.c b/src/third_party/wiredtiger/examples/c/ex_event_handler.c index ba6807cd56d..d1e08edb04d 100644 --- a/src/third_party/wiredtiger/examples/c/ex_event_handler.c +++ b/src/third_party/wiredtiger/examples/c/ex_event_handler.c @@ -90,7 +90,7 @@ handle_wiredtiger_message( /*! [Function event_handler] */ static int -config_event_handler() +config_event_handler(void) { WT_CONNECTION *conn; WT_SESSION *session; diff --git a/src/third_party/wiredtiger/src/block/block_addr.c b/src/third_party/wiredtiger/src/block/block_addr.c index b1f2fd9454a..d8cc1d627cf 100644 --- a/src/third_party/wiredtiger/src/block/block_addr.c +++ b/src/third_party/wiredtiger/src/block/block_addr.c @@ -112,7 +112,7 @@ __wt_block_addr_invalid(WT_SESSION_IMPL *session, #endif /* Check if the address is past the end of the file. */ - return (offset + size > block->fh->size ? EINVAL : 0); + return (offset + size > block->size ? EINVAL : 0); } /* diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c index 812bf99acfb..a861a21876b 100644 --- a/src/third_party/wiredtiger/src/block/block_ckpt.c +++ b/src/third_party/wiredtiger/src/block/block_ckpt.c @@ -135,8 +135,11 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, * that was done when the checkpoint was first written (re-writing the * checkpoint might possibly make it relevant here, but it's unlikely * enough I don't bother). + * + * If in-memory, we don't read or write the object, and the truncate + * will unnecessarily allocate buffer space. */ - if (!checkpoint) { + if (!checkpoint && !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) { /* * The truncate might fail if there's a file mapping (if there's * an open checkpoint on the file), that's OK. @@ -144,7 +147,7 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ERR(__wt_verbose(session, WT_VERB_CHECKPOINT, "truncate file to %" PRIuMAX, (uintmax_t)ci->file_size)); WT_ERR_BUSY_OK( - __wt_block_truncate(session, block->fh, ci->file_size)); + __wt_block_truncate(session, block, ci->file_size)); } if (0) { @@ -192,7 +195,7 @@ __wt_block_checkpoint_unload( * an open checkpoint on the file), that's OK. */ WT_TRET_BUSY_OK( - __wt_block_truncate(session, block->fh, block->fh->size)); + __wt_block_truncate(session, block, block->size)); __wt_spin_lock(session, &block->live_lock); __wt_block_ckpt_destroy(session, &block->live); @@ -738,7 +741,7 @@ __ckpt_update(WT_SESSION_IMPL *session, * if there ever is, this will need to be fixed. */ if (is_live) - ci->file_size = block->fh->size; + ci->file_size = block->size; /* * Copy the checkpoint information into the checkpoint array's address diff --git a/src/third_party/wiredtiger/src/block/block_compact.c b/src/third_party/wiredtiger/src/block/block_compact.c index 8c9be4f029c..24ca6632311 100644 --- a/src/third_party/wiredtiger/src/block/block_compact.c +++ b/src/third_party/wiredtiger/src/block/block_compact.c @@ -59,20 +59,17 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) WT_DECL_RET; WT_EXT *ext; WT_EXTLIST *el; - WT_FH *fh; wt_off_t avail_eighty, avail_ninety, eighty, ninety; *skipp = true; /* Return a default skip. */ - fh = block->fh; - /* * We do compaction by copying blocks from the end of the file to the * beginning of the file, and we need some metrics to decide if it's * worth doing. Ignore small files, and files where we are unlikely * to recover 10% of the file. */ - if (fh->size <= WT_MEGABYTE) + if (block->size <= WT_MEGABYTE) return (0); /* @@ -93,8 +90,8 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) /* Sum the available bytes in the initial 80% and 90% of the file. */ avail_eighty = avail_ninety = 0; - ninety = fh->size - fh->size / 10; - eighty = fh->size - ((fh->size / 10) * 2); + ninety = block->size - block->size / 10; + eighty = block->size - ((block->size / 10) * 2); el = &block->live.avail; WT_EXT_FOREACH(ext, el->off) @@ -117,11 +114,11 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) * less useful. */ if (avail_eighty > WT_MEGABYTE && - avail_eighty >= ((fh->size / 10) * 2)) { + avail_eighty >= ((block->size / 10) * 2)) { *skipp = false; block->compact_pct_tenths = 2; } else if (avail_ninety > WT_MEGABYTE && - avail_ninety >= fh->size / 10) { + avail_ninety >= block->size / 10) { *skipp = false; block->compact_pct_tenths = 1; } @@ -140,7 +137,8 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp) "%s: require 10%% or %" PRIuMAX "MB (%" PRIuMAX ") in the first " "90%% of the file to perform compaction, compaction %s", block->name, - (uintmax_t)(fh->size / 10) / WT_MEGABYTE, (uintmax_t)fh->size / 10, + (uintmax_t)(block->size / 10) / WT_MEGABYTE, + (uintmax_t)block->size / 10, *skipp ? "skipped" : "proceeding")); err: __wt_spin_unlock(session, &block->live_lock); @@ -159,15 +157,12 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session, WT_DECL_RET; WT_EXT *ext; WT_EXTLIST *el; - WT_FH *fh; wt_off_t limit, offset; uint32_t size, cksum; WT_UNUSED(addr_size); *skipp = true; /* Return a default skip. */ - fh = block->fh; - /* Crack the cookie. */ WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum)); @@ -179,7 +174,7 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session, * there's an obvious race if the file is sufficiently busy. */ __wt_spin_lock(session, &block->live_lock); - limit = fh->size - ((fh->size / 10) * block->compact_pct_tenths); + limit = block->size - ((block->size / 10) * block->compact_pct_tenths); if (offset > limit) { el = &block->live.avail; WT_EXT_FOREACH(ext, el->off) { @@ -217,7 +212,7 @@ __block_dump_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, bool start) u_int i; el = &block->live.avail; - size = block->fh->size; + size = block->size; WT_RET(__wt_verbose(session, WT_VERB_COMPACT, "============ %s", diff --git a/src/third_party/wiredtiger/src/block/block_ext.c b/src/third_party/wiredtiger/src/block/block_ext.c index ab5d5604087..caafcc77c48 100644 --- a/src/third_party/wiredtiger/src/block/block_ext.c +++ b/src/third_party/wiredtiger/src/block/block_ext.c @@ -462,17 +462,13 @@ static inline int __block_extend( WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t *offp, wt_off_t size) { - WT_FH *fh; - - fh = block->fh; - /* * Callers of this function are expected to have already acquired any * locks required to extend the file. * * We should never be allocating from an empty file. */ - if (fh->size < block->allocsize) + if (block->size < block->allocsize) WT_RET_MSG(session, EINVAL, "file has no description information"); @@ -482,12 +478,12 @@ __block_extend( * 8B bits (we currently check an wt_off_t is 8B in verify_build.h). I * don't think we're likely to see anything bigger for awhile. */ - if (fh->size > (wt_off_t)INT64_MAX - size) + if (block->size > (wt_off_t)INT64_MAX - size) WT_RET_MSG(session, WT_ERROR, "block allocation failed, file cannot grow further"); - *offp = fh->size; - fh->size += size; + *offp = block->size; + block->size += size; WT_STAT_FAST_DATA_INCR(session, block_extension); WT_RET(__wt_verbose(session, WT_VERB_BLOCK, @@ -1343,19 +1339,16 @@ __wt_block_extlist_truncate( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_EXTLIST *el) { WT_EXT *ext, **astack[WT_SKIP_MAXDEPTH]; - WT_FH *fh; wt_off_t orig, size; - fh = block->fh; - /* * Check if the last available extent is at the end of the file, and if * so, truncate the file and discard the extent. */ if ((ext = __block_off_srch_last(el->off, astack)) == NULL) return (0); - WT_ASSERT(session, ext->off + ext->size <= fh->size); - if (ext->off + ext->size < fh->size) + WT_ASSERT(session, ext->off + ext->size <= block->size); + if (ext->off + ext->size < block->size) return (0); /* @@ -1363,10 +1356,10 @@ __wt_block_extlist_truncate( * the cached file size, and that can't happen until after the extent * list removal succeeds.) */ - orig = fh->size; + orig = block->size; size = ext->off; WT_RET(__block_off_remove(session, block, el, size, NULL)); - fh->size = size; + block->size = size; /* * Truncate the file. The truncate might fail if there's a file mapping @@ -1376,7 +1369,7 @@ __wt_block_extlist_truncate( WT_RET(__wt_verbose(session, WT_VERB_BLOCK, "truncate file from %" PRIdMAX " to %" PRIdMAX, (intmax_t)orig, (intmax_t)size)); - WT_RET_BUSY_OK(__wt_block_truncate(session, block->fh, size)); + WT_RET_BUSY_OK(__wt_block_truncate(session, block, size)); return (0); } diff --git a/src/third_party/wiredtiger/src/block/block_map.c b/src/third_party/wiredtiger/src/block/block_map.c index b60623a37d8..b16fe7f8423 100644 --- a/src/third_party/wiredtiger/src/block/block_map.c +++ b/src/third_party/wiredtiger/src/block/block_map.c @@ -17,6 +17,8 @@ __wt_block_map( WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapp, size_t *maplenp, void **mappingcookie) { + WT_DECL_RET; + *(void **)mapp = NULL; *maplenp = 0; @@ -42,14 +44,6 @@ __wt_block_map( return (0); /* - * Turn off mapping when direct I/O is configured for the file, the - * Linux open(2) documentation says applications should avoid mixing - * mmap(2) of files with direct I/O to the same files. - */ - if (block->fh->direct_io) - return (0); - - /* * Turn off mapping if the application configured a cache size maximum, * we can't control how much of the cache size we use in that case. */ @@ -58,12 +52,16 @@ __wt_block_map( /* * Map the file into memory. - * Ignore errors, we'll read the file through the cache if map fails. + * Ignore not-supported errors, we'll read the file through the cache + * if map fails. */ - (void)__wt_mmap(session, block->fh, mapp, maplenp, mappingcookie); + ret = block->fh->fh_map( + session, block->fh, mapp, maplenp, mappingcookie); + if (ret == ENOTSUP) + ret = 0; #endif - return (0); + return (ret); } /* @@ -76,5 +74,6 @@ __wt_block_unmap( void **mappingcookie) { /* Unmap the file from memory. */ - return (__wt_munmap(session, block->fh, map, maplen, mappingcookie)); + return (block->fh->fh_map_unmap( + session, block->fh, map, maplen, mappingcookie)); } diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c index 0bb75d129e1..06150a0f062 100644 --- a/src/third_party/wiredtiger/src/block/block_mgr.c +++ b/src/third_party/wiredtiger/src/block/block_mgr.c @@ -411,11 +411,21 @@ __bm_stat(WT_BM *bm, WT_SESSION_IMPL *session, WT_DSRC_STATS *stats) * Flush a file to disk. */ static int -__bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool async) +__bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool block) { - return (async ? - __wt_fsync_async(session, bm->block->fh) : - __wt_fsync(session, bm->block->fh)); + WT_DECL_RET; + + if (!block && !bm->block->nowait_sync_available) + return (0); + + if ((ret = __wt_fsync(session, bm->block->fh, block)) == 0) + return (0); + + /* Ignore ENOTSUP, but don't try again. */ + if (ret != ENOTSUP) + return (ret); + bm->block->nowait_sync_available = false; + return (0); } /* diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c index adb745c99e7..f4da5ca7c05 100644 --- a/src/third_party/wiredtiger/src/block/block_open.c +++ b/src/third_party/wiredtiger/src/block/block_open.c @@ -44,8 +44,8 @@ __wt_block_manager_create( * in our space. Move any existing files out of the way and complain. */ for (;;) { - if ((ret = __wt_open(session, - filename, true, true, WT_FILE_TYPE_DATA, &fh)) == 0) + if ((ret = __wt_open(session, filename, WT_FILE_TYPE_DATA, + WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &fh)) == 0) break; WT_ERR_TEST(ret != EEXIST, ret); @@ -67,13 +67,13 @@ __wt_block_manager_create( } /* Write out the file's meta-data. */ - ret = __wt_desc_init(session, fh, allocsize); + ret = __wt_desc_write(session, fh, allocsize); /* * Ensure the truncated file has made it to disk, then the upper-level * is never surprised. */ - WT_TRET(__wt_fsync(session, fh)); + WT_TRET(__wt_fsync(session, fh, true)); /* Close the file handle. */ WT_TRET(__wt_close(session, &fh)); @@ -157,6 +157,8 @@ __wt_block_open(WT_SESSION_IMPL *session, WT_DECL_RET; uint64_t bucket, hash; + WT_UNUSED(readonly); + WT_RET(__wt_verbose(session, WT_VERB_BLOCK, "open: %s", filename)); conn = S2C(session); @@ -194,41 +196,23 @@ __wt_block_open(WT_SESSION_IMPL *session, /* Configuration: optional OS buffer cache maximum size. */ WT_ERR(__wt_config_gets(session, cfg, "os_cache_max", &cval)); block->os_cache_max = (size_t)cval.val; -#ifdef HAVE_POSIX_FADVISE - if (conn->direct_io && block->os_cache_max) - WT_ERR_MSG(session, EINVAL, - "os_cache_max not supported in combination with direct_io"); -#else - if (block->os_cache_max) - WT_ERR_MSG(session, EINVAL, - "os_cache_max not supported if posix_fadvise not " - "available"); -#endif /* Configuration: optional immediate write scheduling flag. */ WT_ERR(__wt_config_gets(session, cfg, "os_cache_dirty_max", &cval)); block->os_cache_dirty_max = (size_t)cval.val; -#ifdef HAVE_SYNC_FILE_RANGE - if (conn->direct_io && block->os_cache_dirty_max) - WT_ERR_MSG(session, EINVAL, - "os_cache_dirty_max not supported in combination with " - "direct_io"); -#else - if (block->os_cache_dirty_max) { - /* - * Ignore any setting if it is not supported. - */ - block->os_cache_dirty_max = 0; - WT_ERR(__wt_verbose(session, WT_VERB_BLOCK, - "os_cache_dirty_max ignored when sync_file_range not " - "available")); - } -#endif + + /* Set the file extension information. */ + block->extend_len = conn->data_extend_len; + + /* Set the asynchronous flush, preload availability. */ + block->nowait_sync_available = true; + block->preload_available = true; /* Open the underlying file handle. */ - WT_ERR(__wt_open(session, filename, false, false, - readonly ? WT_FILE_TYPE_CHECKPOINT : WT_FILE_TYPE_DATA, - &block->fh)); + WT_ERR(__wt_open(session, filename, WT_FILE_TYPE_DATA, 0, &block->fh)); + + /* Set the file's size. */ + WT_ERR(__wt_filesize(session, block->fh, &block->size)); /* Initialize the live checkpoint's lock. */ WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager")); @@ -282,16 +266,20 @@ __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block) } /* - * __wt_desc_init -- + * __wt_desc_write -- * Write a file's initial descriptor structure. */ int -__wt_desc_init(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize) +__wt_desc_write(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize) { WT_BLOCK_DESC *desc; WT_DECL_ITEM(buf); WT_DECL_RET; + /* If in-memory, we don't read or write the descriptor structure. */ + if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) + return (0); + /* Use a scratch buffer to get correct alignment for direct I/O. */ WT_RET(__wt_scr_alloc(session, allocsize, &buf)); memset(buf->mem, 0, allocsize); @@ -329,6 +317,10 @@ __desc_read(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_DECL_RET; uint32_t cksum_calculate, cksum_tmp; + /* If in-memory, we don't read or write the descriptor structure. */ + if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) + return (0); + /* Use a scratch buffer to get correct alignment for direct I/O. */ WT_RET(__wt_scr_alloc(session, block->allocsize, &buf)); @@ -406,7 +398,7 @@ __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats) WT_STAT_WRITE(stats, block_minor, WT_BLOCK_MINOR_VERSION); WT_STAT_WRITE( stats, block_reuse_bytes, (int64_t)block->live.avail.bytes); - WT_STAT_WRITE(stats, block_size, block->fh->size); + WT_STAT_WRITE(stats, block_size, block->size); } /* @@ -418,7 +410,7 @@ __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep) { WT_UNUSED(session); - *sizep = bm->block->fh == NULL ? 0 : bm->block->fh->size; + *sizep = bm->block->size; return (0); } diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c index 6e74d7a7793..6f0c41c1b5c 100644 --- a/src/third_party/wiredtiger/src/block/block_read.c +++ b/src/third_party/wiredtiger/src/block/block_read.c @@ -26,33 +26,41 @@ __wt_bm_preload( WT_UNUSED(addr_size); block = bm->block; - /* - * Turn off pre-load when direct I/O is configured for the file, - * the kernel cache isn't interesting. - */ - if (block->fh->direct_io) - return (0); - WT_STAT_FAST_CONN_INCR(session, block_preload); - /* Crack the cookie. */ - WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum)); - - /* Check for a mapped block. */ - mapped = bm->map != NULL && offset + size <= (wt_off_t)bm->maplen; - if (mapped) - return (__wt_mmap_preload( - session, (uint8_t *)bm->map + offset, size)); + /* Preload the block. */ + if (block->preload_available) { + /* Crack the cookie. */ + WT_RET(__wt_block_buffer_to_addr( + block, addr, &offset, &size, &cksum)); + + mapped = bm->map != NULL && + offset + size <= (wt_off_t)bm->maplen; + if (mapped) + ret = block->fh->fh_map_preload(session, + block->fh, (uint8_t *)bm->map + offset, size); + else + ret = block->fh->fh_advise(session, + block->fh, (wt_off_t)offset, + (wt_off_t)size, POSIX_FADV_WILLNEED); + if (ret == 0) + return (0); -#ifdef HAVE_POSIX_FADVISE - if (posix_fadvise(block->fh->fd, - (wt_off_t)offset, (wt_off_t)size, POSIX_FADV_WILLNEED) == 0) - return (0); -#endif + /* Ignore ENOTSUP, but don't try again. */ + if (ret != ENOTSUP) + return (ret); + block->preload_available = false; + } - WT_RET(__wt_scr_alloc(session, size, &tmp)); - ret = __wt_block_read_off(session, block, tmp, offset, size, cksum); + /* + * If preload isn't supported, do it the slow way; don't call the + * underlying read routine directly, we don't know for certain if + * this is a mapped range. + */ + WT_RET(__wt_scr_alloc(session, 0, &tmp)); + ret = __wt_bm_read(bm, session, tmp, addr, addr_size); __wt_scr_free(session, &tmp); + return (ret); } @@ -65,6 +73,7 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) { WT_BLOCK *block; + WT_DECL_RET; wt_off_t offset; uint32_t cksum, size; bool mapped; @@ -82,7 +91,15 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, if (mapped) { buf->data = (uint8_t *)bm->map + offset; buf->size = size; - WT_RET(__wt_mmap_preload(session, buf->data, buf->size)); + if (block->preload_available) { + ret = block->fh->fh_map_preload( + session, block->fh, buf->data, buf->size); + + /* Ignore ENOTSUP, but don't try again. */ + if (ret != ENOTSUP) + return (ret); + block->preload_available = false; + } WT_STAT_FAST_CONN_INCR(session, block_map_read); WT_STAT_FAST_CONN_INCRV(session, block_byte_map_read, size); @@ -100,21 +117,9 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, /* Read the block. */ WT_RET(__wt_block_read_off(session, block, buf, offset, size, cksum)); -#ifdef HAVE_POSIX_FADVISE /* Optionally discard blocks from the system's buffer cache. */ - if (block->os_cache_max != 0 && - (block->os_cache += size) > block->os_cache_max) { - WT_DECL_RET; - - block->os_cache = 0; - /* Ignore EINVAL - some file systems don't support the flag. */ - if ((ret = posix_fadvise(block->fh->fd, - (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)) != 0 && - ret != EINVAL) - WT_RET_MSG( - session, ret, "%s: posix_fadvise", block->name); - } -#endif + WT_RET(__wt_block_discard(session, block, (size_t)size)); + return (0); } diff --git a/src/third_party/wiredtiger/src/block/block_slvg.c b/src/third_party/wiredtiger/src/block/block_slvg.c index a8cccd53023..6be3fa73f70 100644 --- a/src/third_party/wiredtiger/src/block/block_slvg.c +++ b/src/third_party/wiredtiger/src/block/block_slvg.c @@ -21,7 +21,7 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block) allocsize = block->allocsize; /* Reset the description information in the first block. */ - WT_RET(__wt_desc_init(session, block->fh, allocsize)); + WT_RET(__wt_desc_write(session, block->fh, allocsize)); /* * Salvage creates a new checkpoint when it's finished, set up for @@ -33,10 +33,10 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block) * Truncate the file to an allocation-size multiple of blocks (bytes * trailing the last block must be garbage, by definition). */ - if (block->fh->size > allocsize) { - len = (block->fh->size / allocsize) * allocsize; - if (len != block->fh->size) - WT_RET(__wt_block_truncate(session, block->fh, len)); + if (block->size > allocsize) { + len = (block->size / allocsize) * allocsize; + if (len != block->size) + WT_RET(__wt_block_truncate(session, block, len)); } else len = allocsize; block->live.file_size = len; @@ -83,7 +83,7 @@ __wt_block_offset_invalid(WT_BLOCK *block, wt_off_t offset, uint32_t size) if (size > WT_BTREE_PAGE_SIZE_MAX) /* > maximum page size */ return (true); /* past end-of-file */ - if (offset + (wt_off_t)size > block->fh->size) + if (offset + (wt_off_t)size > block->size) return (true); return (false); } @@ -111,7 +111,7 @@ __wt_block_salvage_next(WT_SESSION_IMPL *session, WT_ERR(__wt_scr_alloc(session, allocsize, &tmp)); /* Read through the file, looking for pages. */ - for (max = fh->size;;) { + for (max = block->size;;) { offset = block->slvg_off; if (offset >= max) { /* Check eof. */ *eofp = 1; diff --git a/src/third_party/wiredtiger/src/block/block_vrfy.c b/src/third_party/wiredtiger/src/block/block_vrfy.c index 35c7a2c218c..6570184ca10 100644 --- a/src/third_party/wiredtiger/src/block/block_vrfy.c +++ b/src/third_party/wiredtiger/src/block/block_vrfy.c @@ -57,7 +57,7 @@ __wt_block_verify_start(WT_SESSION_IMPL *session, * a file immediately after creation or the checkpoint doesn't reflect * any of the data pages). */ - size = block->fh->size; + size = block->size; if (size <= block->allocsize) return (0); @@ -156,7 +156,7 @@ __verify_last_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckpt) ci = &_ci; WT_RET(__wt_block_ckpt_init(session, ci, ckpt->name)); WT_ERR(__wt_block_buffer_to_ckpt(session, block, ckpt->raw.data, ci)); - WT_ERR(__wt_block_truncate(session, block->fh, ci->file_size)); + WT_ERR_BUSY_OK(__wt_block_truncate(session, block, ci->file_size)); err: __wt_block_ckpt_destroy(session, ci); return (ret); @@ -368,7 +368,7 @@ __verify_filefrag_add(WT_SESSION_IMPL *session, WT_BLOCK *block, (uintmax_t)offset, (uintmax_t)(offset + size), (uintmax_t)size)); /* Check each chunk against the total file size. */ - if (offset + size > block->fh->size) + if (offset + size > block->size) WT_RET_MSG(session, WT_ERROR, "fragment %" PRIuMAX "-%" PRIuMAX " references " "non-existent file blocks", diff --git a/src/third_party/wiredtiger/src/block/block_write.c b/src/third_party/wiredtiger/src/block/block_write.c index e05a430832e..134272b52f9 100644 --- a/src/third_party/wiredtiger/src/block/block_write.c +++ b/src/third_party/wiredtiger/src/block/block_write.c @@ -9,28 +9,47 @@ #include "wt_internal.h" /* - * __wt_block_header -- - * Return the size of the block-specific header. + * __wt_block_truncate -- + * Truncate the file. */ -u_int -__wt_block_header(WT_BLOCK *block) +int +__wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len) { - WT_UNUSED(block); + WT_RET(__wt_ftruncate(session, block->fh, len)); - return ((u_int)WT_BLOCK_HEADER_SIZE); + block->size = block->extend_size = len; + + return (0); } /* - * __wt_block_truncate -- - * Truncate the file. + * __wt_block_discard -- + * Discard blocks from the system buffer cache. */ int -__wt_block_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +__wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size) { - WT_RET(__wt_ftruncate(session, fh, len)); + WT_DECL_RET; - fh->size = fh->extend_size = len; + if (block->os_cache_max == 0) + return (0); + /* + * We're racing on the addition, but I'm not willing to serialize on it + * in the standard read path with more evidence it's needed. + */ + if ((block->os_cache += added_size) <= block->os_cache_max) + return (0); + + block->os_cache = 0; + WT_ERR(block->fh->fh_advise(session, + block->fh, (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)); + return (0); + +err: /* Ignore ENOTSUP, but don't try again. */ + if (ret != ENOTSUP) + return (ret); + block->os_cache_max = 0; return (0); } @@ -61,7 +80,7 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, locked = true; /* If not configured to extend the file, we're done. */ - if (fh->extend_len == 0) + if (block->extend_len == 0) return (0); /* @@ -73,9 +92,9 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, * why there's a check in case the extended file size becomes too small: * if the file size catches up, every thread tries to extend it. */ - if (fh->extend_size > fh->size && - (offset > fh->extend_size || - offset + fh->extend_len + (wt_off_t)align_size < fh->extend_size)) + if (block->extend_size > block->size && + (offset > block->extend_size || offset + + block->extend_len + (wt_off_t)align_size < block->extend_size)) return (0); /* @@ -108,9 +127,9 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, * and that's OK, we simply may do another extension sooner than * otherwise. */ - fh->extend_size = fh->size + fh->extend_len * 2; + block->extend_size = block->size + block->extend_len * 2; if ((ret = __wt_fallocate( - session, fh, fh->size, fh->extend_len * 2)) == 0) + session, fh, block->size, block->extend_len * 2)) == 0) return (0); if (ret != ENOTSUP) return (ret); @@ -130,13 +149,13 @@ __wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block, * extend length after locking so we don't overwrite already-written * blocks. */ - fh->extend_size = fh->size + fh->extend_len * 2; + block->extend_size = block->size + block->extend_len * 2; /* * The truncate might fail if there's a mapped file (in other words, if * there's an open checkpoint on the file), that's OK. */ - if ((ret = __wt_ftruncate(session, fh, fh->extend_size)) == EBUSY) + if ((ret = __wt_ftruncate(session, fh, block->extend_size)) == EBUSY) ret = 0; return (ret); } @@ -318,7 +337,6 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_RET(ret); } -#ifdef HAVE_SYNC_FILE_RANGE /* * Optionally schedule writes for dirty pages in the system buffer * cache, but only if the current session can wait. @@ -327,20 +345,19 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, (block->os_cache_dirty += align_size) > block->os_cache_dirty_max && __wt_session_can_wait(session)) { block->os_cache_dirty = 0; - WT_RET(__wt_fsync_async(session, fh)); - } -#endif -#ifdef HAVE_POSIX_FADVISE - /* Optionally discard blocks from the system buffer cache. */ - if (block->os_cache_max != 0 && - (block->os_cache += align_size) > block->os_cache_max) { - block->os_cache = 0; - if ((ret = posix_fadvise(fh->fd, - (wt_off_t)0, (wt_off_t)0, POSIX_FADV_DONTNEED)) != 0) - WT_RET_MSG( - session, ret, "%s: posix_fadvise", block->name); + if ((ret = __wt_fsync(session, fh, false)) != 0) { + /* + * Ignore ENOTSUP, but don't try again. + */ + if (ret != ENOTSUP) + return (ret); + block->os_cache_dirty_max = 0; + } } -#endif + + /* Optionally discard blocks from the buffer cache. */ + WT_RET(__wt_block_discard(session, block, align_size)); + WT_STAT_FAST_CONN_INCR(session, block_write); WT_STAT_FAST_CONN_INCRV(session, block_byte_write, align_size); diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index 7c7f8cab855..8ce1463a0db 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -19,7 +19,7 @@ typedef struct { * When using the standard event handlers, the debugging output has to * do its own message handling because its output isn't line-oriented. */ - FILE *fp; /* Output file stream */ + WT_FH *fh; /* Output file stream */ WT_ITEM *msg; /* Buffered message */ WT_ITEM *tmp; /* Temporary space */ @@ -97,11 +97,8 @@ __debug_config(WT_SESSION_IMPL *session, WT_DBG *ds, const char *ofile) if (ofile == NULL) return (__wt_scr_alloc(session, 512, &ds->msg)); - /* If we're using a file, flush on each line. */ - WT_RET(__wt_fopen(session, ofile, WT_FHANDLE_WRITE, 0, &ds->fp)); - - (void)setvbuf(ds->fp, NULL, _IOLBF, 0); - return (0); + return (__wt_open(session, ofile, WT_FILE_TYPE_REGULAR, + WT_OPEN_CREATE | WT_STREAM_LINE_BUFFER | WT_STREAM_WRITE, &ds->fh)); } /* @@ -130,7 +127,7 @@ __dmsg_wrapup(WT_DBG *ds) } /* Close any file we opened. */ - (void)__wt_fclose(&ds->fp, WT_FHANDLE_WRITE); + (void)__wt_close(session, &ds->fh); } /* @@ -155,7 +152,7 @@ __dmsg(WT_DBG *ds, const char *fmt, ...) * the output chunk, and pass it to the event handler once we see a * terminating newline. */ - if (ds->fp == NULL) { + if (ds->fh == NULL) { msg = ds->msg; for (;;) { p = (char *)msg->mem + msg->size; @@ -187,7 +184,7 @@ __dmsg(WT_DBG *ds, const char *fmt, ...) } } else { va_start(ap, fmt); - (void)__wt_vfprintf(ds->fp, fmt, ap); + (void)__wt_vfprintf(session, ds->fh, fmt, ap); va_end(ap); } } @@ -204,7 +201,7 @@ __wt_debug_addr_print( WT_DECL_RET; WT_RET(__wt_scr_alloc(session, 128, &buf)); - ret = __wt_fprintf(stderr, + ret = __wt_fprintf(session, WT_STDERR(session), "%s\n", __wt_addr_string(session, addr, addr_size, buf)); __wt_scr_free(session, &buf); diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c index 1f739c9572e..1181d92609f 100644 --- a/src/third_party/wiredtiger/src/btree/bt_discard.c +++ b/src/third_party/wiredtiger/src/btree/bt_discard.c @@ -40,6 +40,7 @@ __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref) void __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) { + WT_FH *fh; WT_PAGE *page; WT_PAGE_HEADER *dsk; WT_PAGE_MODIFY *mod; @@ -133,8 +134,10 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) dsk = (WT_PAGE_HEADER *)page->dsk; if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC)) __wt_overwrite_and_free_len(session, dsk, dsk->mem_size); - if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED)) - (void)__wt_mmap_discard(session, dsk, dsk->mem_size); + if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED)) { + fh = S2BT(session)->bm->block->fh; + (void)fh->fh_map_discard(session, fh, dsk, dsk->mem_size); + } __wt_overwrite_and_free(session, page); } diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index 1d33a7e7c9a..02eea9c2f0c 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -36,8 +36,8 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) btree = S2BT(session); /* Checkpoint files are readonly. */ - readonly = (dhandle->checkpoint != NULL || - F_ISSET(S2C(session), WT_CONN_READONLY)); + readonly = dhandle->checkpoint != NULL || + F_ISSET(S2C(session), WT_CONN_READONLY); /* Get the checkpoint information for this name/checkpoint pair. */ WT_CLEAR(ckpt); diff --git a/src/third_party/wiredtiger/src/btree/bt_huffman.c b/src/third_party/wiredtiger/src/btree/bt_huffman.c index 2c0238545fb..a1aaf2c7ea0 100644 --- a/src/third_party/wiredtiger/src/btree/bt_huffman.c +++ b/src/third_party/wiredtiger/src/btree/bt_huffman.c @@ -134,9 +134,9 @@ static int __wt_huffman_read(WT_SESSION_IMPL *, */ static int __huffman_confchk_file( - WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v, bool *is_utf8p, FILE **fpp) + WT_SESSION_IMPL *session, WT_CONFIG_ITEM *v, bool *is_utf8p, WT_FH **fhp) { - FILE *fp; + WT_FH *fh; WT_DECL_RET; size_t len; char *fname; @@ -157,14 +157,14 @@ __huffman_confchk_file( /* Check the file exists. */ WT_RET(__wt_strndup(session, v->str + len, v->len - len, &fname)); - WT_ERR(__wt_fopen(session, - fname, WT_FHANDLE_READ, WT_FOPEN_FIXED, &fp)); + WT_ERR(__wt_open(session, fname, WT_FILE_TYPE_REGULAR, + WT_OPEN_FIXED | WT_OPEN_READONLY | WT_STREAM_READ, &fh)); /* Optionally return the file handle. */ - if (fpp == NULL) - (void)__wt_fclose(&fp, WT_FHANDLE_READ); + if (fhp == NULL) + (void)__wt_close(session, &fh); else - *fpp = fp; + *fhp = fh; err: __wt_free(session, fname); @@ -298,22 +298,24 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, struct __wt_huffman_table **tablep, u_int *entriesp, u_int *numbytesp) { struct __wt_huffman_table *table, *tp; - FILE *fp; + WT_DECL_ITEM(tmp); WT_DECL_RET; + WT_FH *fh; int64_t symbol, frequency; u_int entries, lineno; + int n; bool is_utf8; *tablep = NULL; *entriesp = *numbytesp = 0; - fp = NULL; + fh = NULL; table = NULL; /* * Try and open the backing file. */ - WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fp)); + WT_RET(__huffman_confchk_file(session, ip, &is_utf8, &fh)); /* * UTF-8 table is 256 bytes, with a range of 0-255. @@ -329,9 +331,13 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, WT_ERR(__wt_calloc_def(session, entries, &table)); } - for (tp = table, lineno = 1; (ret = - fscanf(fp, "%" SCNi64 " %" SCNi64, &symbol, &frequency)) != EOF; - ++tp, ++lineno) { + WT_ERR(__wt_scr_alloc(session, 0, &tmp)); + for (tp = table, lineno = 1;; ++tp, ++lineno) { + WT_ERR(__wt_getline(session, tmp, fh)); + if (tmp->size == 0) + break; + n = sscanf( + tmp->data, "%" SCNi64 " %" SCNi64, &symbol, &frequency); /* * Entries is 0-based, that is, there are (entries +1) possible * values that can be configured. The line number is 1-based, so @@ -343,7 +349,7 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, "Huffman table file %.*s is corrupted, " "more than %" PRIu32 " entries", (int)ip->len, ip->str, entries + 1); - if (ret != 2) + if (n != 2) WT_ERR_MSG(session, EINVAL, "line %u of Huffman table file %.*s is corrupted: " "expected two unsigned integral values", @@ -365,7 +371,6 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, tp->symbol = (uint32_t)symbol; tp->frequency = (uint32_t)frequency; } - ret = ferror(fp) ? WT_ERROR : 0; *entriesp = lineno - 1; *tablep = table; @@ -373,7 +378,9 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip, if (0) { err: __wt_free(session, table); } - (void)__wt_fclose(&fp, WT_FHANDLE_READ); + (void)__wt_close(session, &fh); + + __wt_scr_free(session, &tmp); return (ret); } diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c index 57056eb5c99..826589f8bdd 100644 --- a/src/third_party/wiredtiger/src/btree/bt_sync.c +++ b/src/third_party/wiredtiger/src/btree/bt_sync.c @@ -259,7 +259,7 @@ err: /* On error, clear any left-over tree walk. */ */ if (ret == 0 && syncop == WT_SYNC_WRITE_LEAVES && F_ISSET(conn, WT_CONN_CKPT_SYNC)) - WT_RET(btree->bm->sync(btree->bm, session, true)); + WT_RET(btree->bm->sync(btree->bm, session, false)); return (ret); } diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c index 952298f2456..83dc7924312 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c @@ -84,7 +84,7 @@ __verify_config_offsets( WT_CONFIG list; WT_CONFIG_ITEM cval, k, v; WT_DECL_RET; - u_long offset; + uint64_t offset; *quitp = false; @@ -97,7 +97,7 @@ __verify_config_offsets( * verify because that's where we "dump blocks" for debugging.) */ *quitp = true; - if (v.len != 0 || sscanf(k.str, "%lu", &offset) != 1) + if (v.len != 0 || sscanf(k.str, "%" SCNu64, &offset) != 1) WT_RET_MSG(session, EINVAL, "unexpected dump offset format"); #if !defined(HAVE_DIAGNOSTIC) diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c index 8796ec6b2fc..fd541458fa8 100644 --- a/src/third_party/wiredtiger/src/cache/cache_las.c +++ b/src/third_party/wiredtiger/src/cache/cache_las.c @@ -58,8 +58,10 @@ __wt_las_create(WT_SESSION_IMPL *session) conn = S2C(session); - if (F_ISSET(conn, WT_CONN_READONLY)) + /* Read-only and in-memory configurations don't need the LAS table. */ + if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY)) return (0); + /* * Done at startup: we cannot do it on demand because we require the * schema lock to create and drop the table, and it may not always be diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index c752e5eb265..5b6f0bac323 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -150,9 +150,9 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = { confchk_wiredtiger_open_statistics_log_subconfigs, 6 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," + "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," + "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," + "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," "\"shared_cache\",\"split\",\"temporary\",\"transaction\"," "\"verify\",\"version\",\"write\"]", NULL, 0 }, @@ -680,9 +680,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { { "use_environment_priv", "boolean", NULL, NULL, NULL, 0 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," + "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," + "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," + "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," "\"shared_cache\",\"split\",\"temporary\",\"transaction\"," "\"verify\",\"version\",\"write\"]", NULL, 0 }, @@ -761,9 +761,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { { "use_environment_priv", "boolean", NULL, NULL, NULL, 0 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," + "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," + "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," + "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," "\"shared_cache\",\"split\",\"temporary\",\"transaction\"," "\"verify\",\"version\",\"write\"]", NULL, 0 }, @@ -837,9 +837,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { confchk_wiredtiger_open_transaction_sync_subconfigs, 2 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," + "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," + "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," + "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," "\"shared_cache\",\"split\",\"temporary\",\"transaction\"," "\"verify\",\"version\",\"write\"]", NULL, 0 }, @@ -913,9 +913,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { confchk_wiredtiger_open_transaction_sync_subconfigs, 2 }, { "verbose", "list", NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\"," - "\"evict\",\"evictserver\",\"fileops\",\"log\",\"lsm\"," - "\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\"," - "\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," + "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\"," + "\"lsm\",\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\"," + "\"read\",\"rebalance\",\"reconcile\",\"recovery\",\"salvage\"," "\"shared_cache\",\"split\",\"temporary\",\"transaction\"," "\"verify\",\"version\",\"write\"]", NULL, 0 }, diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index 6d115c8fdcd..9e2f03da21f 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -1118,7 +1118,8 @@ __conn_config_append(const char *cfg[], const char *config) { while (*cfg != NULL) ++cfg; - *cfg = config; + cfg[0] = config; + cfg[1] = NULL; } /* @@ -1196,7 +1197,8 @@ __conn_config_file(WT_SESSION_IMPL *session, return (0); /* Open the configuration file. */ - WT_RET(__wt_open(session, filename, false, false, 0, &fh)); + WT_RET(__wt_open( + session, filename, WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY, &fh)); WT_ERR(__wt_filesize(session, fh, &size)); if (size == 0) goto err; @@ -1488,8 +1490,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) exist = false; if (!is_create) WT_ERR(__wt_exist(session, WT_WIREDTIGER, &exist)); - ret = __wt_open(session, - WT_SINGLETHREAD, is_create || exist, false, 0, &conn->lock_fh); + ret = __wt_open(session, WT_SINGLETHREAD, WT_FILE_TYPE_REGULAR, + is_create || exist ? WT_OPEN_CREATE : 0, &conn->lock_fh); /* * If this is a read-only connection and we cannot grab the lock @@ -1517,7 +1519,7 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) * zero-length, and that's OK, the underlying call supports * locking past the end-of-file. */ - if (__wt_bytelock(conn->lock_fh, (wt_off_t)0, true) != 0) + if (__wt_file_lock(session, conn->lock_fh, true) != 0) WT_ERR_MSG(session, EBUSY, "WiredTiger database is already being managed by " "another process"); @@ -1543,7 +1545,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) } /* We own the lock file, optionally create the WiredTiger file. */ - ret = __wt_open(session, WT_WIREDTIGER, is_create, false, 0, &fh); + ret = __wt_open(session, WT_WIREDTIGER, + WT_FILE_TYPE_REGULAR, is_create ? WT_OPEN_CREATE : 0, &fh); /* * If we're read-only, check for success as well as handled errors. @@ -1564,12 +1567,12 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) * as described above). Immediately release the lock, it's * just a test. */ - if (__wt_bytelock(fh, (wt_off_t)0, true) != 0) { + if (__wt_file_lock(session, fh, true) != 0) { WT_ERR_MSG(session, EBUSY, "WiredTiger database is already being managed by " "another process"); } - WT_ERR(__wt_bytelock(fh, (wt_off_t)0, false)); + WT_ERR(__wt_file_lock(session, fh, false)); } /* @@ -1590,7 +1593,7 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[]) len = (size_t)snprintf(buf, sizeof(buf), "%s\n%s\n", WT_WIREDTIGER, WIREDTIGER_VERSION_STRING); WT_ERR(__wt_write(session, fh, (wt_off_t)0, len, buf)); - WT_ERR(__wt_fsync(session, fh)); + WT_ERR(__wt_fsync(session, fh, true)); } else { /* * Although exclusive and the read-only configuration settings @@ -1692,6 +1695,7 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) { "evict", WT_VERB_EVICT }, { "evictserver", WT_VERB_EVICTSERVER }, { "fileops", WT_VERB_FILEOPS }, + { "handleops", WT_VERB_HANDLEOPS }, { "log", WT_VERB_LOG }, { "lsm", WT_VERB_LSM }, { "lsm_manager", WT_VERB_LSM_MANAGER }, @@ -1750,14 +1754,14 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) static int __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) { - FILE *fp; + WT_FH *fh; WT_CONFIG parser; WT_CONFIG_ITEM cval, k, v; WT_DECL_RET; bool exist; const char *base_config; - fp = NULL; + fh = NULL; base_config = NULL; /* @@ -1789,10 +1793,11 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) if (exist) return (0); - WT_RET(__wt_fopen(session, - WT_BASECONFIG_SET, WT_FHANDLE_WRITE, 0, &fp)); + WT_RET(__wt_open(session, + WT_BASECONFIG_SET, WT_FILE_TYPE_REGULAR, + WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE | WT_STREAM_WRITE, &fh)); - WT_ERR(__wt_fprintf(fp, "%s\n\n", + WT_ERR(__wt_fprintf(session, fh, "%s\n\n", "# Do not modify this file.\n" "#\n" "# WiredTiger created this file when the database was created,\n" @@ -1839,18 +1844,18 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[]) --v.str; v.len += 2; } - WT_ERR(__wt_fprintf(fp, + WT_ERR(__wt_fprintf(session, fh, "%.*s=%.*s\n", (int)k.len, k.str, (int)v.len, v.str)); } WT_ERR_NOTFOUND_OK(ret); /* Flush the handle and rename the file into place. */ - ret = __wt_sync_fp_and_rename( - session, &fp, WT_BASECONFIG_SET, WT_BASECONFIG); + ret = __wt_sync_handle_and_rename( + session, &fh, WT_BASECONFIG_SET, WT_BASECONFIG); if (0) { /* Close open file handle, remove any temporary file. */ -err: WT_TRET(__wt_fclose(&fp, WT_FHANDLE_WRITE)); +err: WT_TRET(__wt_close(session, &fh)); WT_TRET(__wt_remove_if_exists(session, WT_BASECONFIG_SET)); } @@ -1932,44 +1937,71 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, session = conn->default_session = &conn->dummy_session; session->iface.connection = &conn->iface; session->name = "wiredtiger_open"; - __wt_random_init(&session->rnd); + + /* Do standard I/O and error handling first. */ + WT_ERR(__wt_os_stdio(session)); __wt_event_handler_set(session, event_handler); - /* Remaining basic initialization of the connection structure. */ + /* Basic initialization of the connection structure. */ WT_ERR(__wt_connection_init(conn)); - /* Check/set the application-specified configuration string. */ + /* Check the application-specified configuration string. */ WT_ERR(__wt_config_check(session, WT_CONFIG_REF(session, wiredtiger_open), config, 0)); + + /* + * Build the temporary, initial configuration stack, in the following + * order (where later entries override earlier entries): + * + * 1. the base configuration for the wiredtiger_open call + * 2. the config passed in by the application + * 3. environment variable settings (optional) + * + * In other words, a configuration stack based on the application's + * passed-in information and nothing else. + */ cfg[0] = WT_CONFIG_BASE(session, wiredtiger_open); cfg[1] = config; - - /* Capture the config_base setting file for later use. */ - WT_ERR(__wt_config_gets(session, cfg, "config_base", &cval)); - config_base_set = cval.val != 0; - - /* Configure error messages so we get them right early. */ - WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval)); - if (cval.len != 0) - WT_ERR(__wt_strndup( - session, cval.str, cval.len, &conn->error_prefix)); + WT_ERR(__wt_scr_alloc(session, 0, &i1)); + WT_ERR(__conn_config_env(session, cfg, i1)); /* - * We need to look for read-only early so that we can use it - * in __conn_single and whether to use the base config file. - * XXX that means we can only make the choice in __conn_single if the - * user passes it in via the config string to wiredtiger_open. + * We need to know if configured for read-only or in-memory behavior + * before reading/writing the filesystem. The only way the application + * can configure that before we touch the filesystem is the wiredtiger + * config string or the WIREDTIGER_CONFIG environment variable. + * + * The environment isn't trusted by default, for security reasons; if + * the application wants us to trust the environment before reading + * the filesystem, the wiredtiger_open config string is the only way. */ + WT_ERR(__wt_config_gets(session, cfg, "in_memory", &cval)); + if (cval.val != 0) + F_SET(conn, WT_CONN_IN_MEMORY); WT_ERR(__wt_config_gets(session, cfg, "readonly", &cval)); if (cval.val) F_SET(conn, WT_CONN_READONLY); /* - * XXX ideally, we would check "in_memory" here, so we could completely - * avoid having a database directory. However, it can be convenient to - * pass "in_memory" via the WIREDTIGER_CONFIG environment variable, and - * we haven't read it yet. + * After checking readonly and in-memory, but before we do anything that + * touches the filesystem, configure the OS layer. */ + WT_ERR(__wt_os_init(session)); + + /* + * Capture the config_base setting file for later use. Again, if the + * application doesn't want us to read the base configuration file, + * the WIREDTIGER_CONFIG environment variable or the wiredtiger_open + * config string are the only ways. + */ + WT_ERR(__wt_config_gets(session, cfg, "config_base", &cval)); + config_base_set = cval.val != 0; + + /* Configure error messages so we get them right early. */ + WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval)); + if (cval.len != 0) + WT_ERR(__wt_strndup( + session, cval.str, cval.len, &conn->error_prefix)); /* Get the database home. */ WT_ERR(__conn_home(session, home, cfg)); @@ -1978,8 +2010,8 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR(__conn_single(session, cfg)); /* - * Build the configuration stack, in the following order (where later - * entries override earlier entries): + * Build the real configuration stack, in the following order (where + * later entries override earlier entries): * * 1. all possible wiredtiger_open configurations * 2. the WiredTiger compilation version (expected to be overridden by @@ -1993,7 +2025,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, * Clear the entries we added to the stack, we're going to build it in * order. */ - WT_ERR(__wt_scr_alloc(session, 0, &i1)); WT_ERR(__wt_scr_alloc(session, 0, &i2)); WT_ERR(__wt_scr_alloc(session, 0, &i3)); cfg[0] = WT_CONFIG_BASE(session, wiredtiger_open_all); @@ -2016,11 +2047,15 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, * Merge the full configuration stack and save it for reconfiguration. */ WT_ERR(__wt_config_merge(session, cfg, NULL, &merge_cfg)); + /* - * The read-only setting may have been set in a configuration file. - * Get it again so that we can override other configuration settings - * before they are processed by the subsystems. + * Read-only and in-memory settings may have been set in a configuration + * file (not optimal, but we can handle it). Get those settings again so + * we can override other configuration settings as they are processed. */ + WT_ERR(__wt_config_gets(session, cfg, "in_memory", &cval)); + if (cval.val != 0) + F_SET(conn, WT_CONN_IN_MEMORY); WT_ERR(__wt_config_gets(session, cfg, "readonly", &cval)); if (cval.val) F_SET(conn, WT_CONN_READONLY); @@ -2054,6 +2089,8 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, * The error message configuration might have changed (if set in a * configuration file, and not in the application's configuration * string), get it again. Do it first, make error messages correct. + * Ditto verbose configuration so we dump everything the application + * wants to see. */ WT_ERR(__wt_config_gets(session, cfg, "error_prefix", &cval)); if (cval.len != 0) { @@ -2061,6 +2098,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR(__wt_strndup( session, cval.str, cval.len, &conn->error_prefix)); } + WT_ERR(__wt_verbose_config(session, cfg)); WT_ERR(__wt_config_gets(session, cfg, "hazard_max", &cval)); conn->hazard_max = (uint32_t)cval.val; @@ -2071,10 +2109,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR(__wt_config_gets(session, cfg, "session_scratch_max", &cval)); conn->session_scratch_max = (size_t)cval.val; - WT_ERR(__wt_config_gets(session, cfg, "in_memory", &cval)); - if (cval.val != 0) - F_SET(conn, WT_CONN_IN_MEMORY); - WT_ERR(__wt_config_gets(session, cfg, "checkpoint_sync", &cval)); if (cval.val) F_SET(conn, WT_CONN_CKPT_SYNC); @@ -2137,7 +2171,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR(__conn_statistics_config(session, cfg)); WT_ERR(__wt_lsm_manager_config(session, cfg)); WT_ERR(__wt_sweep_config(session, cfg)); - WT_ERR(__wt_verbose_config(session, cfg)); /* Initialize the OS page size for mmap */ conn->page_size = __wt_get_vm_pagesize(); diff --git a/src/third_party/wiredtiger/src/conn/conn_handle.c b/src/third_party/wiredtiger/src/conn/conn_handle.c index 16717597f4d..5f4c38e7361 100644 --- a/src/third_party/wiredtiger/src/conn/conn_handle.c +++ b/src/third_party/wiredtiger/src/conn/conn_handle.c @@ -41,6 +41,9 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) TAILQ_INIT(&conn->lsm_manager.appqh); TAILQ_INIT(&conn->lsm_manager.managerqh); + /* Random numbers. */ + __wt_random_init(&session->rnd); + /* Configuration. */ WT_RET(__wt_conn_config_init(session)); @@ -119,14 +122,6 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) session = conn->default_session; - /* - * Close remaining open files (before discarding the mutex, the - * underlying file-close code uses the mutex to guard lists of - * open files. - */ - if (conn->lock_fh) - WT_TRET(__wt_close(session, &conn->lock_fh)); - /* Remove from the list of connections. */ __wt_spin_lock(session, &__wt_process.spinlock); TAILQ_REMOVE(&__wt_process.connqh, conn, q); @@ -160,6 +155,9 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_free(session, conn->error_prefix); __wt_free(session, conn->sessions); + /* Destroy the OS configuration. */ + WT_TRET(__wt_os_cleanup(session)); + __wt_free(NULL, conn); return (ret); } diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c index 757d69bf240..6cb8ba3d0f9 100644 --- a/src/third_party/wiredtiger/src/conn/conn_log.c +++ b/src/third_party/wiredtiger/src/conn/conn_log.c @@ -260,8 +260,7 @@ __log_prealloc_once(WT_SESSION_IMPL *session) * files that may not have been used yet. */ WT_ERR(__wt_dirlist(session, conn->log_path, - WT_LOG_PREPNAME, WT_DIRLIST_INCLUDE, - &recfiles, &reccount)); + WT_LOG_PREPNAME, WT_DIRLIST_INCLUDE, &recfiles, &reccount)); __wt_log_files_free(session, recfiles, reccount); recfiles = NULL; /* @@ -399,7 +398,7 @@ __log_file_server(void *arg) * to move the sync_lsn into the next file for * later syncs. */ - WT_ERR(__wt_fsync(session, close_fh)); + WT_ERR(__wt_fsync(session, close_fh, true)); /* * We want to make sure the file size reflects * actual data and has minimal pre-allocated @@ -451,7 +450,7 @@ __log_file_server(void *arg) log->bg_sync_lsn.l.file) || (log->sync_lsn.l.file < min_lsn.l.file)) continue; - WT_ERR(__wt_fsync(session, log->log_fh)); + WT_ERR(__wt_fsync(session, log->log_fh, true)); __wt_spin_lock(session, &log->log_sync_lock); locked = true; /* diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c index aff422654d7..38c3288209e 100644 --- a/src/third_party/wiredtiger/src/conn/conn_open.c +++ b/src/third_party/wiredtiger/src/conn/conn_open.c @@ -76,7 +76,6 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) WT_CONNECTION *wt_conn; WT_DECL_RET; WT_DLH *dlh; - WT_FH *fh; WT_SESSION_IMPL *s, *session; WT_TXN_GLOBAL *txn_global; u_int i; @@ -150,20 +149,6 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) WT_TRET(__wt_conn_remove_encryptor(session)); WT_TRET(__wt_conn_remove_extractor(session)); - /* - * Complain if files weren't closed, ignoring the lock file, we'll - * close it in a minute. - */ - TAILQ_FOREACH(fh, &conn->fhqh, q) { - if (fh == conn->lock_fh) - continue; - - __wt_errx(session, - "Connection has open file handles: %s", fh->name); - WT_TRET(__wt_close(session, &fh)); - fh = TAILQ_FIRST(&conn->fhqh); - } - /* Disconnect from shared cache - must be before cache destroy. */ WT_TRET(__wt_conn_cache_pool_destroy(session)); @@ -182,6 +167,13 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) WT_TRET(__wt_dlclose(session, dlh)); } + /* Close the lock file, opening up the database to other connections. */ + if (conn->lock_fh != NULL) + WT_TRET(__wt_close(session, &conn->lock_fh)); + + /* Close any file handles left open. */ + WT_TRET(__wt_close_connection_close(session)); + /* * Close the internal (default) session, and switch back to the dummy * session in case of any error messages from the remaining operations diff --git a/src/third_party/wiredtiger/src/conn/conn_stat.c b/src/third_party/wiredtiger/src/conn/conn_stat.c index d6e59a50da5..fccc4786402 100644 --- a/src/third_party/wiredtiger/src/conn/conn_stat.c +++ b/src/third_party/wiredtiger/src/conn/conn_stat.c @@ -209,10 +209,11 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, bool conn_stats) } if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON)) { - WT_ERR(__wt_fprintf(conn->stat_fp, + WT_ERR(__wt_fprintf(session, conn->stat_fh, "{\"version\":\"%s\",\"localTime\":\"%s\"", WIREDTIGER_VERSION_STRING, conn->stat_stamp)); - WT_ERR(__wt_fprintf(conn->stat_fp, ",\"wiredTiger\":{")); + WT_ERR(__wt_fprintf( + session, conn->stat_fh, ",\"wiredTiger\":{")); while ((ret = cursor->next(cursor)) == 0) { WT_ERR(cursor->get_value(cursor, &desc, &valstr, &val)); /* Check if we are starting a new section. */ @@ -224,23 +225,23 @@ __statlog_dump(WT_SESSION_IMPL *session, const char *name, bool conn_stats) strncmp(desc, tmp->data, tmp->size) != 0) { WT_ERR(__wt_buf_set( session, tmp, desc, prefixlen)); - WT_ERR(__wt_fprintf(conn->stat_fp, + WT_ERR(__wt_fprintf(session, conn->stat_fh, "%s\"%.*s\":{", first ? "" : "},", (int)prefixlen, desc)); first = false; groupfirst = true; } - WT_ERR(__wt_fprintf(conn->stat_fp, + WT_ERR(__wt_fprintf(session, conn->stat_fh, "%s\"%s\":%" PRId64, groupfirst ? "" : ",", endprefix + 2, val)); groupfirst = false; } WT_ERR_NOTFOUND_OK(ret); - WT_ERR(__wt_fprintf(conn->stat_fp, "}}}\n")); + WT_ERR(__wt_fprintf(session, conn->stat_fh, "}}}\n")); } else { while ((ret = cursor->next(cursor)) == 0) { WT_ERR(cursor->get_value(cursor, &desc, &valstr, &val)); - WT_ERR(__wt_fprintf(conn->stat_fp, + WT_ERR(__wt_fprintf(session, conn->stat_fh, "%s %" PRId64 " %s %s\n", conn->stat_stamp, val, name, desc)); } @@ -349,11 +350,11 @@ err: if (locked) static int __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp) { - FILE *log_file; - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; struct timespec ts; struct tm *tm, _tm; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_FH *log_file; conn = S2C(session); @@ -366,16 +367,18 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp) WT_RET_MSG(session, ENOMEM, "strftime path conversion"); /* If the path has changed, cycle the log file. */ - if ((log_file = conn->stat_fp) == NULL || + if ((log_file = conn->stat_fh) == NULL || path == NULL || strcmp(tmp->mem, path->mem) != 0) { - conn->stat_fp = NULL; - WT_RET(__wt_fclose(&log_file, WT_FHANDLE_APPEND)); + conn->stat_fh = NULL; + WT_RET(__wt_close(session, &log_file)); if (path != NULL) (void)strcpy(path->mem, tmp->mem); - WT_RET(__wt_fopen(session, - tmp->mem, WT_FHANDLE_APPEND, WT_FOPEN_FIXED, &log_file)); + WT_RET(__wt_open(session, tmp->mem, + WT_FILE_TYPE_REGULAR, + WT_OPEN_CREATE | WT_OPEN_FIXED | WT_STREAM_APPEND, + &log_file)); } - conn->stat_fp = log_file; + conn->stat_fh = log_file; /* Create the entry prefix for this time of day. */ if (strftime(tmp->mem, tmp->memsize, conn->stat_format, tm) == 0) @@ -408,7 +411,7 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp) WT_RET(__statlog_lsm_apply(session)); /* Flush. */ - return (__wt_fflush(conn->stat_fp)); + return (__wt_fsync(session, conn->stat_fh, true)); } /* @@ -594,7 +597,7 @@ __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close) conn->stat_session = NULL; conn->stat_tid_set = false; conn->stat_format = NULL; - WT_TRET(__wt_fclose(&conn->stat_fp, WT_FHANDLE_APPEND)); + WT_TRET(__wt_close(session, &conn->stat_fh)); conn->stat_path = NULL; conn->stat_sources = NULL; conn->stat_stamp = NULL; diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c index cc0aa5a1322..5d24ea61607 100644 --- a/src/third_party/wiredtiger/src/conn/conn_sweep.c +++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c @@ -369,8 +369,9 @@ __wt_sweep_create(WT_SESSION_IMPL *session) * * Don't tap the sweep thread for eviction. */ - session_flags = WT_SESSION_CAN_WAIT | - WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION; + session_flags = WT_SESSION_CAN_WAIT | WT_SESSION_NO_EVICTION; + if (F_ISSET(conn, WT_CONN_LAS_OPEN)) + session_flags |= WT_SESSION_LOOKASIDE_CURSOR; WT_RET(__wt_open_internal_session( conn, "sweep-server", true, session_flags, &conn->sweep_session)); session = conn->sweep_session; diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c index 2fb0c464a76..5be9b311a79 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_backup.c +++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c @@ -251,7 +251,7 @@ __backup_start( * Close any hot backup file. * We're about to open the incremental backup file. */ - WT_TRET(__wt_fclose(&cb->bfp, WT_FHANDLE_WRITE)); + WT_TRET(__wt_close(session, &cb->bfh)); WT_ERR(__backup_file_create(session, cb, log_only)); WT_ERR(__backup_list_append( session, cb, WT_INCREMENTAL_BACKUP)); @@ -269,7 +269,7 @@ __backup_start( } err: /* Close the hot backup file. */ - WT_TRET(__wt_fclose(&cb->bfp, WT_FHANDLE_WRITE)); + WT_TRET(__wt_close(session, &cb->bfh)); if (ret != 0) { WT_TRET(__backup_cleanup_handles(session, cb)); WT_TRET(__backup_stop(session)); @@ -411,9 +411,9 @@ static int __backup_file_create( WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb, bool incremental) { - return (__wt_fopen(session, + return (__wt_open(session, incremental ? WT_INCREMENTAL_BACKUP : WT_METADATA_BACKUP, - WT_FHANDLE_WRITE, 0, &cb->bfp)); + WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_STREAM_WRITE, &cb->bfh)); } /* @@ -440,6 +440,7 @@ __backup_list_uri_append( WT_SESSION_IMPL *session, const char *name, bool *skip) { WT_CURSOR_BACKUP *cb; + WT_DECL_RET; char *value; cb = session->bkp_cursor; @@ -472,8 +473,9 @@ __backup_list_uri_append( /* Add the metadata entry to the backup file. */ WT_RET(__wt_metadata_search(session, name, &value)); - WT_RET(__wt_fprintf(cb->bfp, "%s\n%s\n", name, value)); + ret = __wt_fprintf(session, cb->bfh, "%s\n%s\n", name, value); __wt_free(session, value); + WT_RET(ret); /* Add file type objects to the list of files to be copied. */ if (WT_PREFIX_MATCH(name, "file:")) diff --git a/src/third_party/wiredtiger/src/docs/top/main.dox b/src/third_party/wiredtiger/src/docs/top/main.dox index 7e670541e7d..08f7536d540 100644 --- a/src/third_party/wiredtiger/src/docs/top/main.dox +++ b/src/third_party/wiredtiger/src/docs/top/main.dox @@ -6,12 +6,12 @@ WiredTiger is an high performance, scalable, production quality, NoSQL, @section releases Releases <table> +@row{<b>WiredTiger 2.8.0</b> (current), + <a href="releases/wiredtiger-2.8.0.tar.bz2"><b>[Release package]</b></a>, + <a href="2.8.0/index.html"><b>[Documentation]</b></a>} @row{<b>WiredTiger 2.7.0</b> (current), <a href="releases/wiredtiger-2.7.0.tar.bz2"><b>[Release package]</b></a>, <a href="2.7.0/index.html"><b>[Documentation]</b></a>} -@row{<b>WiredTiger 2.6.1</b> (previous), - <a href="releases/wiredtiger-2.6.1.tar.bz2"><b>[Release package]</b></a>, - <a href="2.6.1/index.html"><b>[Documentation]</b></a>} @row{<b>Development branch</b>, <a href="https://github.com/wiredtiger/wiredtiger"><b>[Source code]</b></a>, <a href="develop/index.html"><b>[Documentation]</b></a>} diff --git a/src/third_party/wiredtiger/src/docs/upgrading.dox b/src/third_party/wiredtiger/src/docs/upgrading.dox index 8b3d61e4c19..5e824fee977 100644 --- a/src/third_party/wiredtiger/src/docs/upgrading.dox +++ b/src/third_party/wiredtiger/src/docs/upgrading.dox @@ -1,6 +1,6 @@ /*! @page upgrading Upgrading WiredTiger applications -@section version_271 Upgrading to Version 2.7.1 +@section version_280 Upgrading to Version 2.8.0 <dl> <dt>LSM metadata</dt> <dd> @@ -14,7 +14,7 @@ format will be upgraded automatically, but once updated to the new version <dd> Historically, bulk-load of a column-store object ignored any key set in the cursor and automatically assigned each inserted row the next sequential -record number for its key. In the 2.7.1 release, column-store objects match +record number for its key. In the 2.8.0 release, column-store objects match row-store behavior and require the cursor key be set before an insert. (This allows sparse tables to be created in column-store objects, any skipped records are created as already-deleted rows.) To match the previous @@ -49,6 +49,13 @@ bulk-loaded, so they do not get named checkpoints that complete during the bulk load. </dd> +<dt>Remove WT_LSN structure from public API</dt> +<dd> +The WiredTiger public API used to define a structure that could encapsulate +log sequence numbers. That structure is no longer exposed publicly. +</dd> + +<dt> </dl><hr> @section version_270 Upgrading to Version 2.7.0 diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index 50a00787f35..d3e32d7fc23 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -1688,9 +1688,9 @@ __wt_evict_priority_clear(WT_SESSION_IMPL *session) int __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) { - FILE *fp; WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle, *saved_dhandle; + WT_FH *fh; WT_PAGE *page; WT_REF *next_walk; uint64_t dirty_bytes, dirty_pages, intl_bytes, intl_pages; @@ -1702,12 +1702,13 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) total_bytes = 0; if (ofile == NULL) - fp = stderr; + fh = WT_STDERR(session); else - WT_RET(__wt_fopen(session, ofile, WT_FHANDLE_WRITE, 0, &fp)); + WT_RET(__wt_open(session, ofile, WT_FILE_TYPE_REGULAR, + WT_OPEN_CREATE | WT_STREAM_WRITE, &fh)); /* Note: odd string concatenation avoids spelling errors. */ - (void)__wt_fprintf(fp, "==========\n" "cache dump\n"); + (void)__wt_fprintf(session, fh, "==========\n" "cache dump\n"); saved_dhandle = session->dhandle; TAILQ_FOREACH(dhandle, &conn->dhqh, q) { @@ -1746,23 +1747,25 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) session->dhandle = NULL; if (dhandle->checkpoint == NULL) - (void)__wt_fprintf(fp, "%s(<live>): \n", dhandle->name); + (void)__wt_fprintf(session, fh, + "%s(<live>): \n", dhandle->name); else - (void)__wt_fprintf(fp, "%s(checkpoint=%s): \n", + (void)__wt_fprintf(session, fh, + "%s(checkpoint=%s): \n", dhandle->name, dhandle->checkpoint); if (intl_pages != 0) - (void)__wt_fprintf(fp, "\t" "internal pages: " - "%" PRIu64 " pages, %" PRIu64 + (void)__wt_fprintf(session, fh, + "\t" "internal pages: %" PRIu64 " pages, %" PRIu64 " max, %" PRIu64 "MB total\n", intl_pages, max_intl_bytes, intl_bytes >> 20); if (leaf_pages != 0) - (void)__wt_fprintf(fp, "\t" "leaf pages: " - "%" PRIu64 " pages, %" PRIu64 + (void)__wt_fprintf(session, fh, + "\t" "leaf pages: %" PRIu64 " pages, %" PRIu64 " max, %" PRIu64 "MB total\n", leaf_pages, max_leaf_bytes, leaf_bytes >> 20); if (dirty_pages != 0) - (void)__wt_fprintf(fp, "\t" "dirty pages: " - "%" PRIu64 " pages, %" PRIu64 + (void)__wt_fprintf(session, fh, + "\t" "dirty pages: %" PRIu64 " pages, %" PRIu64 " max, %" PRIu64 "MB total\n", dirty_pages, max_dirty_bytes, dirty_bytes >> 20); @@ -1777,12 +1780,13 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) if (conn->cache->overhead_pct != 0) total_bytes += (total_bytes * (uint64_t)conn->cache->overhead_pct) / 100; - (void)__wt_fprintf(fp, "cache dump: total found = %" PRIu64 "MB" - " vs tracked inuse %" PRIu64 "MB\n", + (void)__wt_fprintf(session, fh, + "cache dump: total found = %" PRIu64 + "MB vs tracked inuse %" PRIu64 "MB\n", total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20); - (void)__wt_fprintf(fp, "==========\n"); - if (fp != stderr) - WT_RET(__wt_fclose(&fp, WT_FHANDLE_WRITE)); + (void)__wt_fprintf(session, fh, "==========\n"); + if (ofile != NULL) + WT_RET(__wt_close(session, &fh)); return (0); } #endif diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h index 10efd35086c..e964fb4e8c2 100644 --- a/src/third_party/wiredtiger/src/include/block.h +++ b/src/third_party/wiredtiger/src/include/block.h @@ -217,10 +217,16 @@ struct __wt_block { /* A list of block manager handles, sharing a file descriptor. */ uint32_t ref; /* References */ - WT_FH *fh; /* Backing file handle */ TAILQ_ENTRY(__wt_block) q; /* Linked list of handles */ TAILQ_ENTRY(__wt_block) hashq; /* Hashed list of handles */ + WT_FH *fh; /* Backing file handle */ + wt_off_t size; /* File size */ + wt_off_t extend_size; /* File extended size */ + wt_off_t extend_len; /* File extend chunk size */ + bool nowait_sync_available; /* File can flush asynchronously */ + bool preload_available; /* File pages can be preloaded */ + /* Configuration information, set when the file is opened. */ uint32_t allocfirst; /* Allocation is first-fit */ uint32_t allocsize; /* Allocation size */ @@ -399,3 +405,15 @@ __wt_block_header_byteswap(WT_BLOCK_HEADER *blk) */ #define WT_BLOCK_COMPRESS_SKIP 64 #define WT_BLOCK_ENCRYPT_SKIP WT_BLOCK_HEADER_BYTE_SIZE + +/* + * __wt_block_header -- + * Return the size of the block-specific header. + */ +static inline u_int +__wt_block_header(WT_BLOCK *block) +{ + WT_UNUSED(block); + + return ((u_int)WT_BLOCK_HEADER_SIZE); +} diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index 2255056fcf6..c2b1dd68c18 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -333,7 +333,7 @@ struct __wt_connection_impl { bool stat_tid_set; /* Statistics log thread set */ WT_CONDVAR *stat_cond; /* Statistics log wait mutex */ const char *stat_format; /* Statistics log timestamp format */ - FILE *stat_fp; /* Statistics log file handle */ + WT_FH *stat_fh; /* Statistics log file handle */ char *stat_path; /* Statistics log path format */ char **stat_sources; /* Statistics log list of objects */ const char *stat_stamp; /* Statistics log entry timestamp */ @@ -421,5 +421,25 @@ struct __wt_connection_impl { int page_size; /* OS page size for mmap alignment */ uint32_t verbose; + void *inmemory; /* In-memory configuration cookie */ + +#define WT_STDERR(s) (&S2C(s)->wt_stderr) +#define WT_STDOUT(s) (&S2C(s)->wt_stdout) + WT_FH wt_stderr, wt_stdout; + + /* + * OS library/system call jump table, to support in-memory and readonly + * configurations as well as special devices with other non-POSIX APIs. + */ + int (*file_directory_list)(WT_SESSION_IMPL *, + const char *, const char *, uint32_t, char ***, u_int *); + int (*file_directory_sync)(WT_SESSION_IMPL *, const char *); + int (*file_exist)(WT_SESSION_IMPL *, const char *, bool *); + int (*file_remove)(WT_SESSION_IMPL *, const char *); + int (*file_rename)(WT_SESSION_IMPL *, const char *, const char *); + int (*file_size)(WT_SESSION_IMPL *, const char *, bool, wt_off_t *); + int (*handle_open)(WT_SESSION_IMPL *, + WT_FH *, const char *, uint32_t, uint32_t); + uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 4b35daf106e..1d2ce1bfd82 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -67,7 +67,7 @@ struct __wt_cursor_backup { WT_CURSOR iface; size_t next; /* Cursor position */ - FILE *bfp; /* Backup file */ + WT_FH *bfh; /* Backup file */ uint32_t maxid; /* Maximum log file ID seen */ WT_CURSOR_BACKUP_ENTRY *list; /* List of files to be copied. */ diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 48c52d4a109..292bcfb1c7c 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -49,7 +49,7 @@ extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *file extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on); extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp); extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block); -extern int __wt_desc_init(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize); +extern int __wt_desc_write(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize); extern void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats); extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep); extern int __wt_block_manager_named_size( WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep); @@ -73,8 +73,8 @@ extern int __wt_block_verify_end(WT_SESSION_IMPL *session, WT_BLOCK *block); extern int __wt_verify_ckpt_load( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_BLOCK_CKPT *ci); extern int __wt_verify_ckpt_unload(WT_SESSION_IMPL *session, WT_BLOCK *block); extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size); -extern u_int __wt_block_header(WT_BLOCK *block); -extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len); +extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len); +extern int __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size); extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep); extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum); extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, bool data_cksum, bool caller_locked); @@ -375,23 +375,23 @@ extern int __wt_logrec_read(WT_SESSION_IMPL *session, const uint8_t **pp, const extern int __wt_logop_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *optypep, uint32_t *opsizep); extern int __wt_logop_col_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value); extern int __wt_logop_col_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep); -extern int __wt_logop_col_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); +extern int __wt_logop_col_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags); extern int __wt_logop_col_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno); extern int __wt_logop_col_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop); -extern int __wt_logop_col_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); +extern int __wt_logop_col_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags); extern int __wt_logop_col_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t start, uint64_t stop); extern int __wt_logop_col_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *startp, uint64_t *stopp); -extern int __wt_logop_col_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); +extern int __wt_logop_col_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags); extern int __wt_logop_row_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value); extern int __wt_logop_row_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep); -extern int __wt_logop_row_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); +extern int __wt_logop_row_put_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags); extern int __wt_logop_row_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key); extern int __wt_logop_row_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp); -extern int __wt_logop_row_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); +extern int __wt_logop_row_remove_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags); extern int __wt_logop_row_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *start, WT_ITEM *stop, uint32_t mode); extern int __wt_logop_row_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep); -extern int __wt_logop_row_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); -extern int __wt_txn_op_printlog( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags); +extern int __wt_logop_row_truncate_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags); +extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t flags); extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot); extern int __wt_log_slot_switch( WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced); extern int __wt_log_slot_new(WT_SESSION_IMPL *session); @@ -484,74 +484,6 @@ extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session); extern int __wt_turtle_init(WT_SESSION_IMPL *session); extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep); extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value); -extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); -extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp); -extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp); -extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); -extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); -extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); -extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp); -extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg); -extern int __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); -extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp); -extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret); -extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh); -extern int __wt_errno(void); -extern int __wt_map_error_rdonly(int error); -extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen); -extern int __wt_exist(WT_SESSION_IMPL *session, const char *filename, bool *existp); -extern void __wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh); -extern int __wt_fallocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len); -extern int __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep); -extern int __wt_filesize_name(WT_SESSION_IMPL *session, const char *filename, bool silent, wt_off_t *sizep); -extern int __wt_bytelock(WT_FH *fhp, wt_off_t byte, bool lock); -extern int __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh); -extern int __wt_directory_sync(WT_SESSION_IMPL *session, const char *path); -extern int __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh); -extern int __wt_fsync_async(WT_SESSION_IMPL *session, WT_FH *fh); -extern int __wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len); -extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp); -extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, FILE *fp); -extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr); -extern int __wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie); -extern int __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size); -extern int __wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size); -extern int __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); -extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp); -extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled); -extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); -extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); -extern int __wt_rwlock_alloc( WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp, const char *name); -extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); -extern int __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); -extern int __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); -extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); -extern int __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); -extern int __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); -extern int __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp); -extern int __wt_once(void (*init_routine)(void)); -extern int __wt_open(WT_SESSION_IMPL *session, const char *name, bool ok_create, bool exclusive, int dio_type, WT_FH **fhp); -extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp); -extern int __wt_get_vm_pagesize(void); -extern bool __wt_absolute_path(const char *path); -extern const char *__wt_path_separator(void); -extern bool __wt_has_priv(void); -extern int __wt_remove(WT_SESSION_IMPL *session, const char *name); -extern int __wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to); -extern int __wt_read( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf); -extern int __wt_write(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, const void *buf); -extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); -extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, WT_FHANDLE_MODE mode_flag, u_int flags, FILE **fpp); -extern int __wt_vfprintf(FILE *fp, const char *fmt, va_list ap); -extern int __wt_fprintf(FILE *fp, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3))); -extern int __wt_fflush(FILE *fp); -extern int __wt_fclose(FILE **fpp, WT_FHANDLE_MODE mode_flag); -extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base); -extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg); -extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); -extern void __wt_thread_id(char *buf, size_t buflen); -extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp); -extern void __wt_yield(void); extern int __wt_ext_struct_pack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, void *buffer, size_t size, const char *fmt, ...); extern int __wt_ext_struct_size(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, size_t *sizep, const char *fmt, ...); extern int __wt_ext_struct_unpack(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const void *buffer, size_t size, const char *fmt, ...); @@ -672,12 +604,6 @@ extern int __wt_panic(WT_SESSION_IMPL *session); extern int __wt_illegal_value(WT_SESSION_IMPL *session, const char *name); extern int __wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri); extern int __wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri); -extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path); -extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path); -extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name); -extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to); -extern int __wt_fh_sync_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to); -extern int __wt_sync_fp_and_rename( WT_SESSION_IMPL *session, FILE **fpp, const char *from, const char *to); extern int __wt_library_init(void); extern int __wt_breakpoint(void); extern void __wt_attach(WT_SESSION_IMPL *session); @@ -703,6 +629,14 @@ extern void __wt_huffman_close(WT_SESSION_IMPL *session, void *huffman_arg); extern int __wt_print_huffman_code(void *huffman_arg, uint16_t symbol); extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf); extern int __wt_huffman_decode(WT_SESSION_IMPL *session, void *huffman_arg, const uint8_t *from_arg, size_t from_len, WT_ITEM *to_buf); +extern int __wt_rwlock_alloc( WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp, const char *name); +extern int __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); +extern int __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); +extern int __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); +extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); +extern int __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); +extern int __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock); +extern int __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp); extern uint32_t __wt_nlpo2_round(uint32_t v); extern uint32_t __wt_nlpo2(uint32_t v); extern uint32_t __wt_log2_int(uint32_t n); @@ -771,10 +705,77 @@ extern int __wt_txn_checkpoint_logread( WT_SESSION_IMPL *session, const uint8_t extern int __wt_txn_checkpoint_log( WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp); extern int __wt_txn_truncate_log( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop); extern int __wt_txn_truncate_end(WT_SESSION_IMPL *session); -extern int __wt_txn_printlog(WT_SESSION *wt_session, FILE *out, uint32_t flags); +extern int __wt_txn_printlog(WT_SESSION *wt_session, uint32_t flags); extern int __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval); extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *cfg[], bool *has_create, bool *has_drops); extern int __wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session); extern int __wt_txn_recover(WT_SESSION_IMPL *session); +extern bool __wt_absolute_path(const char *path); +extern bool __wt_handle_search(WT_SESSION_IMPL *session, const char *name, bool increment_ref, WT_FH *newfh, WT_FH **fhp); +extern bool __wt_has_priv(void); +extern const char *__wt_path_separator(void); +extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen); +extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp); +extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp); +extern int __wt_close_connection_close(WT_SESSION_IMPL *session); +extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, bool is_signalled, WT_CONDVAR **condp); +extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp); +extern int __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond); +extern int __wt_cond_wait_signal( WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool *signalled); +extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to); +extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh); +extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp); +extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret); +extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp); +extern int __wt_errno(void); +extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path); +extern int __wt_get_vm_pagesize(void); +extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp); +extern int __wt_getlasterror(void); +extern int __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh); +extern int __wt_getopt( const char *progname, int nargc, char *const *nargv, const char *ostr); +extern int __wt_malloc(WT_SESSION_IMPL *session, size_t bytes_to_allocate, void *retp); +extern int __wt_map_error_rdonly(int error); +extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path); +extern int __wt_once(void (*init_routine)(void)); +extern int __wt_open(WT_SESSION_IMPL *session, const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp); +extern int __wt_os_cleanup(WT_SESSION_IMPL *session); +extern int __wt_os_init(WT_SESSION_IMPL *session); +extern int __wt_os_inmemory(WT_SESSION_IMPL *session); +extern int __wt_os_inmemory_cleanup(WT_SESSION_IMPL *session); +extern int __wt_os_posix(WT_SESSION_IMPL *session); +extern int __wt_os_posix_cleanup(WT_SESSION_IMPL *session); +extern int __wt_os_stdio(WT_SESSION_IMPL *session); +extern int __wt_os_win(WT_SESSION_IMPL *session); +extern int __wt_os_win_cleanup(WT_SESSION_IMPL *session); +extern int __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); +extern int __wt_posix_handle_allocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len); +extern int __wt_posix_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie); +extern int __wt_posix_map_discard( WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size); +extern int __wt_posix_map_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size); +extern int __wt_posix_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); +extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); +extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); +extern int __wt_realloc_noclear(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp); +extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name); +extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to); +extern int __wt_strndup(WT_SESSION_IMPL *session, const void *str, size_t len, void *retp); +extern int __wt_sync_handle_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to); +extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg); +extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid); +extern int __wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, const char *prefix, uint32_t flags, char ***dirlist, u_int *countp); +extern int __wt_win_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie); +extern int __wt_win_map_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size); +extern int __wt_win_map_preload( WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size); +extern int __wt_win_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie); +extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base); +extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); +extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg); +extern void __wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh); +extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds); +extern void __wt_stream_set_line_buffer(FILE *fp); +extern void __wt_stream_set_no_buffer(FILE *fp); +extern void __wt_thread_id(char *buf, size_t buflen); +extern void __wt_yield(void); diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h index a6f42a9938f..3d9b0ed716b 100644 --- a/src/third_party/wiredtiger/src/include/flags.h +++ b/src/third_party/wiredtiger/src/include/flags.h @@ -28,7 +28,7 @@ #define WT_FILE_TYPE_DATA 0x00000002 #define WT_FILE_TYPE_DIRECTORY 0x00000004 #define WT_FILE_TYPE_LOG 0x00000008 -#define WT_FILE_TYPE_TURTLE 0x00000010 +#define WT_FILE_TYPE_REGULAR 0x00000010 #define WT_LOGSCAN_FIRST 0x00000001 #define WT_LOGSCAN_FROM_CKP 0x00000002 #define WT_LOGSCAN_ONE 0x00000004 @@ -83,24 +83,25 @@ #define WT_VERB_EVICT 0x00000010 #define WT_VERB_EVICTSERVER 0x00000020 #define WT_VERB_FILEOPS 0x00000040 -#define WT_VERB_LOG 0x00000080 -#define WT_VERB_LSM 0x00000100 -#define WT_VERB_LSM_MANAGER 0x00000200 -#define WT_VERB_METADATA 0x00000400 -#define WT_VERB_MUTEX 0x00000800 -#define WT_VERB_OVERFLOW 0x00001000 -#define WT_VERB_READ 0x00002000 -#define WT_VERB_REBALANCE 0x00004000 -#define WT_VERB_RECONCILE 0x00008000 -#define WT_VERB_RECOVERY 0x00010000 -#define WT_VERB_SALVAGE 0x00020000 -#define WT_VERB_SHARED_CACHE 0x00040000 -#define WT_VERB_SPLIT 0x00080000 -#define WT_VERB_TEMPORARY 0x00100000 -#define WT_VERB_TRANSACTION 0x00200000 -#define WT_VERB_VERIFY 0x00400000 -#define WT_VERB_VERSION 0x00800000 -#define WT_VERB_WRITE 0x01000000 +#define WT_VERB_HANDLEOPS 0x00000080 +#define WT_VERB_LOG 0x00000100 +#define WT_VERB_LSM 0x00000200 +#define WT_VERB_LSM_MANAGER 0x00000400 +#define WT_VERB_METADATA 0x00000800 +#define WT_VERB_MUTEX 0x00001000 +#define WT_VERB_OVERFLOW 0x00002000 +#define WT_VERB_READ 0x00004000 +#define WT_VERB_REBALANCE 0x00008000 +#define WT_VERB_RECONCILE 0x00010000 +#define WT_VERB_RECOVERY 0x00020000 +#define WT_VERB_SALVAGE 0x00040000 +#define WT_VERB_SHARED_CACHE 0x00080000 +#define WT_VERB_SPLIT 0x00100000 +#define WT_VERB_TEMPORARY 0x00200000 +#define WT_VERB_TRANSACTION 0x00400000 +#define WT_VERB_VERIFY 0x00800000 +#define WT_VERB_VERSION 0x01000000 +#define WT_VERB_WRITE 0x02000000 #define WT_VISIBILITY_ERR 0x00000010 /* * flags section: END diff --git a/src/third_party/wiredtiger/src/include/misc.i b/src/third_party/wiredtiger/src/include/misc.i index 04376441340..114b711ac88 100644 --- a/src/third_party/wiredtiger/src/include/misc.i +++ b/src/third_party/wiredtiger/src/include/misc.i @@ -65,8 +65,253 @@ __wt_verbose(WT_SESSION_IMPL *session, int flag, const char *fmt, ...) return (ret); #else WT_UNUSED(session); - WT_UNUSED(fmt); WT_UNUSED(flag); + WT_UNUSED(fmt); return (0); #endif } + +/* + * __wt_dirlist -- + * Get a list of files from a directory. + */ +static inline int +__wt_dirlist(WT_SESSION_IMPL *session, const char *dir, + const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + + WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, + "%s: directory-list: %s prefix %s", + dir, LF_ISSET(WT_DIRLIST_INCLUDE) ? "include" : "exclude", + prefix == NULL ? "all" : prefix)); + + return (S2C(session)->file_directory_list( + session, dir, prefix, flags, dirlist, countp)); +} + +/* + * __wt_directory_sync -- + * Flush a directory to ensure file creation is durable. + */ +static inline int +__wt_directory_sync(WT_SESSION_IMPL *session, const char *name) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + WT_RET(__wt_verbose( + session, WT_VERB_FILEOPS, "%s: directory-sync", name)); + + return (S2C(session)->file_directory_sync(session, name)); +} + +/* + * __wt_exist -- + * Return if the file exists. + */ +static inline int +__wt_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-exist", name)); + + return (S2C(session)->file_exist(session, name, existp)); +} + +/* + * __wt_remove -- + * POSIX remove. + */ +static inline int +__wt_remove(WT_SESSION_IMPL *session, const char *name) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-remove", name)); + + return (S2C(session)->file_remove(session, name)); +} + +/* + * __wt_rename -- + * POSIX rename. + */ +static inline int +__wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + WT_RET(__wt_verbose( + session, WT_VERB_FILEOPS, "%s to %s: file-rename", from, to)); + + return (S2C(session)->file_rename(session, from, to)); +} + +/* + * __wt_filesize_name -- + * Get the size of a file in bytes, by file name. + */ +static inline int +__wt_filesize_name( + WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +{ + WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: file-size", name)); + + return (S2C(session)->file_size(session, name, silent, sizep)); +} + +/* + * __wt_directory_sync_fh -- + * Flush a directory file handle to ensure file creation is durable. + * + * We don't use the normal sync path because many file systems don't require + * this step and we don't want to penalize them. + */ +static inline int +__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + return (fh->fh_sync(session, fh, true)); +} + +/* + * __wt_fallocate -- + * Extend a file. + */ +static inline int +__wt_fallocate( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + + WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: handle-allocate: %" PRIuMAX " at %" PRIuMAX, + fh->name, (uintmax_t)len, (uintmax_t)offset)); + + return (fh->fh_allocate(session, fh, offset, len)); +} + +/* + * __wt_file_lock -- + * Lock/unlock a file. + */ +static inline int +__wt_file_lock(WT_SESSION_IMPL * session, WT_FH *fh, bool lock) +{ + WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: handle-lock: %s", fh->name, lock ? "lock" : "unlock")); + + return (fh->fh_lock(session, fh, lock)); +} + +/* + * __wt_vfprintf -- + * ANSI C vfprintf. + */ +static inline int +__wt_vfprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + WT_RET(__wt_verbose( + session, WT_VERB_HANDLEOPS, "%s: handle-printf", fh->name)); + + return (fh->fh_printf(session, fh, fmt, ap)); +} + +/* + * __wt_fprintf -- + * ANSI C fprintf. + */ +static inline int +__wt_fprintf(WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, ...) + WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) +{ + WT_DECL_RET; + va_list ap; + + va_start(ap, fmt); + ret = __wt_vfprintf(session, fh, fmt, ap); + va_end(ap); + + return (ret); +} + +/* + * __wt_read -- + * POSIX pread. + */ +static inline int +__wt_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: handle-read: %" WT_SIZET_FMT " at %" PRIuMAX, + fh->name, len, (uintmax_t)offset)); + + WT_STAT_FAST_CONN_INCR(session, read_io); + + return (fh->fh_read(session, fh, offset, len, buf)); +} + +/* + * __wt_filesize -- + * Get the size of a file in bytes, by file handle. + */ +static inline int +__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + WT_RET(__wt_verbose( + session, WT_VERB_HANDLEOPS, "%s: handle-size", fh->name)); + + return (fh->fh_size(session, fh, sizep)); +} + +/* + * __wt_fsync -- + * POSIX fflush/fsync. + */ +static inline int +__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + WT_RET(__wt_verbose( + session, WT_VERB_HANDLEOPS, "%s: handle-sync", fh->name)); + + return (fh->fh_sync(session, fh, block)); +} + +/* + * __wt_ftruncate -- + * POSIX ftruncate. + */ +static inline int +__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: handle-truncate: %" PRIuMAX, + fh->name, (uintmax_t)len)); + + return (fh->fh_truncate(session, fh, len)); +} + +/* + * __wt_write -- + * POSIX pwrite. + */ +static inline int +__wt_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || + WT_STRING_MATCH(fh->name, + WT_SINGLETHREAD, strlen(WT_SINGLETHREAD))); + + WT_RET(__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: handle-write: %" WT_SIZET_FMT " at %" PRIuMAX, + fh->name, len, (uintmax_t)offset)); + + WT_STAT_FAST_CONN_INCR(session, write_io); + + return (fh->fh_write(session, fh, offset, len, buf)); +} diff --git a/src/third_party/wiredtiger/src/include/os.h b/src/third_party/wiredtiger/src/include/os.h index fbba7f05f88..2ff41d39f46 100644 --- a/src/third_party/wiredtiger/src/include/os.h +++ b/src/third_party/wiredtiger/src/include/os.h @@ -7,29 +7,6 @@ */ /* - * FILE handle close/open configuration. - */ -typedef enum { - WT_FHANDLE_APPEND, WT_FHANDLE_READ, WT_FHANDLE_WRITE -} WT_FHANDLE_MODE; - -#ifdef _WIN32 -/* - * Open in binary (untranslated) mode; translations involving carriage-return - * and linefeed characters are suppressed. - */ -#define WT_FOPEN_APPEND "ab" -#define WT_FOPEN_READ "rb" -#define WT_FOPEN_WRITE "wb" -#else -#define WT_FOPEN_APPEND "a" -#define WT_FOPEN_READ "r" -#define WT_FOPEN_WRITE "w" -#endif - -#define WT_FOPEN_FIXED 0x1 /* Path isn't relative to home */ - -/* * Number of directory entries can grow dynamically. */ #define WT_DIR_ENTRY 32 @@ -81,24 +58,52 @@ typedef enum { (t1).tv_nsec < (t2).tv_nsec ? -1 : \ (t1).tv_nsec == (t2).tv_nsec ? 0 : 1 : 1) +/* + * The underlying OS calls return ENOTSUP if posix_fadvise functionality isn't + * available, but WiredTiger uses the POSIX flag names in the API. Use distinct + * values so the underlying code can distinguish. + */ +#ifndef POSIX_FADV_DONTNEED +#define POSIX_FADV_DONTNEED 0x01 +#endif +#ifndef POSIX_FADV_WILLNEED +#define POSIX_FADV_WILLNEED 0x02 +#endif + +#define WT_OPEN_CREATE 0x001 /* Create is OK */ +#define WT_OPEN_EXCLUSIVE 0x002 /* Exclusive open */ +#define WT_OPEN_FIXED 0x004 /* Path isn't relative to home */ +#define WT_OPEN_READONLY 0x008 /* Readonly open */ +#define WT_STREAM_APPEND 0x010 /* Open a stream: append */ +#define WT_STREAM_LINE_BUFFER 0x020 /* Line buffer the stream */ +#define WT_STREAM_READ 0x040 /* Open a stream: read */ +#define WT_STREAM_WRITE 0x080 /* Open a stream: write */ + struct __wt_fh { - char *name; /* File name */ + const char *name; /* File name */ uint64_t name_hash; /* Hash of name */ TAILQ_ENTRY(__wt_fh) q; /* List of open handles */ TAILQ_ENTRY(__wt_fh) hashq; /* Hashed list of handles */ u_int ref; /* Reference count */ -#ifndef _WIN32 - int fd; /* POSIX file handle */ -#else + /* + * Underlying file system handle support. + */ +#ifdef _WIN32 HANDLE filehandle; /* Windows file handle */ HANDLE filehandle_secondary; /* Windows file handle for file size changes */ +#else + int fd; /* POSIX file handle */ #endif - wt_off_t size; /* File size */ - wt_off_t extend_size; /* File extended size */ - wt_off_t extend_len; /* File extend chunk size */ + FILE *fp; /* ANSI C stdio handle */ + + /* + * Underlying in-memory handle support. + */ + size_t off; /* Read/write offset */ + WT_ITEM buf; /* Data */ bool direct_io; /* O_DIRECT configured */ @@ -109,4 +114,26 @@ struct __wt_fh { WT_FALLOCATE_STD, WT_FALLOCATE_SYS } fallocate_available; bool fallocate_requires_locking; + +#define WT_FH_FLUSH_ON_CLOSE 0x01 /* Flush when closing */ +#define WT_FH_IN_MEMORY 0x02 /* In-memory, don't remove */ + uint32_t flags; + + int (*fh_advise)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, wt_off_t, int); + int (*fh_allocate)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, wt_off_t); + int (*fh_close)(WT_SESSION_IMPL *, WT_FH *); + int (*fh_getc)(WT_SESSION_IMPL *, WT_FH *, int *); + int (*fh_lock)(WT_SESSION_IMPL *, WT_FH *, bool); + int (*fh_map)(WT_SESSION_IMPL *, WT_FH *, void *, size_t *, void **); + int (*fh_map_discard)(WT_SESSION_IMPL *, WT_FH *, void *, size_t); + int (*fh_map_preload)(WT_SESSION_IMPL *, WT_FH *, const void *, size_t); + int (*fh_map_unmap)( + WT_SESSION_IMPL *, WT_FH *, void *, size_t, void **); + int (*fh_printf)(WT_SESSION_IMPL *, WT_FH *, const char *, va_list); + int (*fh_read)(WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, void *); + int (*fh_size)(WT_SESSION_IMPL *, WT_FH *, wt_off_t *); + int (*fh_sync)(WT_SESSION_IMPL *, WT_FH *, bool); + int (*fh_truncate)(WT_SESSION_IMPL *, WT_FH *, wt_off_t); + int (*fh_write)( + WT_SESSION_IMPL *, WT_FH *, wt_off_t, size_t, const void *); }; diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 1e263f22880..279858a808e 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -1922,8 +1922,8 @@ struct __wt_connection { * as a list\, such as <code>"verbose=[evictserver\,read]"</code>., a * list\, with values chosen from the following options: \c "api"\, \c * "block"\, \c "checkpoint"\, \c "compact"\, \c "evict"\, \c - * "evictserver"\, \c "fileops"\, \c "log"\, \c "lsm"\, \c - * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c + * "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\, \c "lsm"\, + * \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c * "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c * "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default @@ -2417,10 +2417,10 @@ struct __wt_connection { * list\, such as <code>"verbose=[evictserver\,read]"</code>., a list\, with * values chosen from the following options: \c "api"\, \c "block"\, \c * "checkpoint"\, \c "compact"\, \c "evict"\, \c "evictserver"\, \c "fileops"\, - * \c "log"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c - * "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c - * "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, \c - * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default empty.} + * \c "handleops"\, \c "log"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c + * "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c + * "recovery"\, \c "salvage"\, \c "shared_cache"\, \c "split"\, \c "temporary"\, + * \c "transaction"\, \c "verify"\, \c "version"\, \c "write"; default empty.} * @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to * files. Ignored on non-Windows systems. Options are given as a list\, such * as <code>"write_through=[data]"</code>. Configuring \c write_through requires diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h index 4533c8cbca0..9e5007b38ed 100644 --- a/src/third_party/wiredtiger/src/include/wt_internal.h +++ b/src/third_party/wiredtiger/src/include/wt_internal.h @@ -349,15 +349,15 @@ union __wt_rand_state; #include "extern.h" #include "verify_build.h" -#include "buf.i" -#include "misc.i" #include "intpack.i" /* required by cell.i, packing.i */ -#include "packing.i" + +#include "buf.i" #include "cache.i" /* required by txn.i */ #include "cell.i" /* required by btree.i */ - #include "log.i" +#include "misc.i" #include "mutex.i" /* required by btree.i */ +#include "packing.i" #include "txn.i" /* required by btree.i */ #include "btree.i" /* required by cursor.i */ diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index e41073299a8..1132b54f335 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -132,7 +132,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) WT_ERR(__wt_verbose(session, WT_VERB_LOG, "log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32, log->log_fh->name, min_lsn->l.file, min_lsn->l.offset)); - WT_ERR(__wt_fsync(session, log->log_fh)); + WT_ERR(__wt_fsync(session, log->log_fh, true)); log->sync_lsn = *min_lsn; WT_STAT_FAST_CONN_INCR(session, log_sync); WT_ERR(__wt_cond_signal(session, log->log_sync_cond)); @@ -641,7 +641,7 @@ __log_file_header( /* * Make sure the header gets to disk. */ - WT_ERR(__wt_fsync(session, tmp.slot_fh)); + WT_ERR(__wt_fsync(session, tmp.slot_fh, true)); if (end_lsn != NULL) *end_lsn = tmp.slot_end_lsn; @@ -655,7 +655,7 @@ err: __wt_scr_free(session, &buf); */ static int __log_openfile(WT_SESSION_IMPL *session, - bool ok_create, WT_FH **fh, const char *file_prefix, uint32_t id) + bool ok_create, WT_FH **fhp, const char *file_prefix, uint32_t id) { WT_DECL_ITEM(buf); WT_DECL_RET; @@ -673,8 +673,8 @@ __log_openfile(WT_SESSION_IMPL *session, WT_ERR(__log_filename(session, id, file_prefix, buf)); WT_ERR(__wt_verbose(session, WT_VERB_LOG, "opening log %s", (const char *)buf->data)); - WT_ERR(__wt_open( - session, buf->data, ok_create, false, WT_FILE_TYPE_LOG, fh)); + WT_ERR(__wt_open(session, buf->data, + WT_FILE_TYPE_LOG, ok_create ? WT_OPEN_CREATE : 0, fhp)); /* * If we are not creating the log file but opening it for reading, * check that the magic number and versions are correct. @@ -682,7 +682,7 @@ __log_openfile(WT_SESSION_IMPL *session, if (!ok_create) { WT_ERR(__wt_buf_grow(session, buf, allocsize)); memset(buf->mem, 0, allocsize); - WT_ERR(__wt_read(session, *fh, 0, allocsize, buf->mem)); + WT_ERR(__wt_read(session, *fhp, 0, allocsize, buf->mem)); logrec = (WT_LOG_RECORD *)buf->mem; __wt_log_record_byteswap(logrec); desc = (WT_LOG_DESC *)logrec->record; @@ -690,7 +690,7 @@ __log_openfile(WT_SESSION_IMPL *session, if (desc->log_magic != WT_LOG_MAGIC) WT_PANIC_RET(session, WT_ERROR, "log file %s corrupted: Bad magic number %" PRIu32, - (*fh)->name, desc->log_magic); + (*fhp)->name, desc->log_magic); if (desc->majorv > WT_LOG_MAJOR_VERSION || (desc->majorv == WT_LOG_MAJOR_VERSION && desc->minorv > WT_LOG_MINOR_VERSION)) @@ -724,8 +724,7 @@ __log_alloc_prealloc(WT_SESSION_IMPL *session, uint32_t to_num) * If there are no pre-allocated files, return WT_NOTFOUND. */ logfiles = NULL; - WT_ERR(__log_get_files(session, - WT_LOG_PREPNAME, &logfiles, &logcount)); + WT_ERR(__log_get_files(session, WT_LOG_PREPNAME, &logfiles, &logcount)); if (logcount == 0) return (WT_NOTFOUND); @@ -850,7 +849,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) * the LSNs since we're the only write in progress. */ if (conn_open) { - WT_RET(__wt_fsync(session, log->log_fh)); + WT_RET(__wt_fsync(session, log->log_fh, true)); log->sync_lsn = end_lsn; log->write_lsn = end_lsn; log->write_start_lsn = end_lsn; @@ -946,7 +945,7 @@ __log_truncate(WT_SESSION_IMPL *session, WT_ERR(__log_openfile(session, false, &log_fh, file_prefix, lsn->l.file)); WT_ERR(__wt_ftruncate(session, log_fh, lsn->l.offset)); - WT_ERR(__wt_fsync(session, log_fh)); + WT_ERR(__wt_fsync(session, log_fh, true)); WT_ERR(__wt_close(session, &log_fh)); /* @@ -955,8 +954,7 @@ __log_truncate(WT_SESSION_IMPL *session, */ if (this_log) goto err; - WT_ERR(__log_get_files(session, - WT_LOG_FILENAME, &logfiles, &logcount)); + WT_ERR(__log_get_files(session, WT_LOG_FILENAME, &logfiles, &logcount)); for (i = 0; i < logcount; i++) { WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum)); if (lognum > lsn->l.file && @@ -969,7 +967,7 @@ __log_truncate(WT_SESSION_IMPL *session, */ WT_ERR(__wt_ftruncate(session, log_fh, WT_LOG_FIRST_RECORD)); - WT_ERR(__wt_fsync(session, log_fh)); + WT_ERR(__wt_fsync(session, log_fh, true)); WT_ERR(__wt_close(session, &log_fh)); } } @@ -1019,7 +1017,7 @@ __wt_log_allocfile( WT_ERR(__log_file_header(session, log_fh, NULL, true)); WT_ERR(__wt_ftruncate(session, log_fh, WT_LOG_FIRST_RECORD)); WT_ERR(__log_prealloc(session, log_fh)); - WT_ERR(__wt_fsync(session, log_fh)); + WT_ERR(__wt_fsync(session, log_fh, true)); WT_ERR(__wt_close(session, &log_fh)); WT_ERR(__wt_verbose(session, WT_VERB_LOG, "log_prealloc: rename %s to %s", @@ -1086,7 +1084,7 @@ __wt_log_open(WT_SESSION_IMPL *session) WT_RET(__wt_verbose(session, WT_VERB_LOG, "log_open: open fh to directory %s", conn->log_path)); WT_RET(__wt_open(session, conn->log_path, - false, false, WT_FILE_TYPE_DIRECTORY, &log->log_dir_fh)); + WT_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh)); } if (!F_ISSET(conn, WT_CONN_READONLY)) { @@ -1175,14 +1173,14 @@ __wt_log_close(WT_SESSION_IMPL *session) WT_RET(__wt_verbose(session, WT_VERB_LOG, "closing old log %s", log->log_close_fh->name)); if (!F_ISSET(conn, WT_CONN_READONLY)) - WT_RET(__wt_fsync(session, log->log_close_fh)); + WT_RET(__wt_fsync(session, log->log_close_fh, true)); WT_RET(__wt_close(session, &log->log_close_fh)); } if (log->log_fh != NULL) { WT_RET(__wt_verbose(session, WT_VERB_LOG, "closing log %s", log->log_fh->name)); if (!F_ISSET(conn, WT_CONN_READONLY)) - WT_RET(__wt_fsync(session, log->log_fh)); + WT_RET(__wt_fsync(session, log->log_fh, true)); WT_RET(__wt_close(session, &log->log_fh)); log->log_fh = NULL; } @@ -1206,18 +1204,18 @@ __wt_log_close(WT_SESSION_IMPL *session) * file is zeroes. */ static int -__log_has_hole(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, bool *hole) +__log_has_hole(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t log_size, wt_off_t offset, bool *hole) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LOG *log; - wt_off_t log_size, off, remainder; + wt_off_t off, remainder; size_t bufsz, rdlen; char *buf, *zerobuf; conn = S2C(session); log = conn->log; - log_size = fh->size; remainder = log_size - offset; *hole = false; @@ -1415,7 +1413,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep) log->log_fh->name, sync_lsn.l.file, sync_lsn.l.offset)); WT_STAT_FAST_CONN_INCR(session, log_sync); - WT_ERR(__wt_fsync(session, log->log_fh)); + WT_ERR(__wt_fsync(session, log->log_fh, true)); log->sync_lsn = sync_lsn; WT_ERR(__wt_cond_signal(session, log->log_sync_cond)); } @@ -1561,7 +1559,8 @@ advance: * See if there is anything non-zero at the * end of this log file. */ - WT_ERR(__log_has_hole(session, log_fh, + WT_ERR(__log_has_hole( + session, log_fh, log_size, rd_lsn.l.offset, &partial_record)); /* * If we read the last record, go to the next file. @@ -1625,7 +1624,7 @@ advance: */ if (reclen == 0) { WT_ERR(__log_has_hole( - session, log_fh, rd_lsn.l.offset, &eol)); + session, log_fh, log_size, rd_lsn.l.offset, &eol)); if (eol) /* Found a hole. This LSN is the end. */ break; diff --git a/src/third_party/wiredtiger/src/log/log_auto.c b/src/third_party/wiredtiger/src/log/log_auto.c index 54df01d01ab..d4dab4e1a33 100644 --- a/src/third_party/wiredtiger/src/log/log_auto.c +++ b/src/third_party/wiredtiger/src/log/log_auto.c @@ -131,9 +131,8 @@ __wt_logop_col_put_unpack( } int -__wt_logop_col_put_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_logop_col_put_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { WT_DECL_RET; uint32_t fileid; @@ -145,17 +144,18 @@ __wt_logop_col_put_print( WT_RET(__wt_logop_col_put_unpack( session, pp, end, &fileid, &recno, &value)); - WT_RET(__wt_fprintf(out, " \"optype\": \"col_put\",\n")); - WT_ERR(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), + " \"optype\": \"col_put\",\n")); + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"fileid\": \"%" PRIu32 "\",\n", fileid)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"recno\": \"%" PRIu64 "\",\n", recno)); WT_ERR(__logrec_make_json_str(session, &escaped, &value)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"value\": \"%s\"", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &value)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), ",\n \"value-hex\": \"%s\"", escaped)); } @@ -204,9 +204,8 @@ __wt_logop_col_remove_unpack( } int -__wt_logop_col_remove_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_logop_col_remove_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { uint32_t fileid; uint64_t recno; @@ -215,10 +214,11 @@ __wt_logop_col_remove_print( WT_RET(__wt_logop_col_remove_unpack( session, pp, end, &fileid, &recno)); - WT_RET(__wt_fprintf(out, " \"optype\": \"col_remove\",\n")); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), + " \"optype\": \"col_remove\",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"fileid\": \"%" PRIu32 "\",\n", fileid)); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"recno\": \"%" PRIu64 "\"", recno)); return (0); } @@ -264,9 +264,8 @@ __wt_logop_col_truncate_unpack( } int -__wt_logop_col_truncate_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_logop_col_truncate_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { uint32_t fileid; uint64_t start; @@ -276,12 +275,13 @@ __wt_logop_col_truncate_print( WT_RET(__wt_logop_col_truncate_unpack( session, pp, end, &fileid, &start, &stop)); - WT_RET(__wt_fprintf(out, " \"optype\": \"col_truncate\",\n")); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), + " \"optype\": \"col_truncate\",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"fileid\": \"%" PRIu32 "\",\n", fileid)); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"start\": \"%" PRIu64 "\",\n", start)); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"stop\": \"%" PRIu64 "\"", stop)); return (0); } @@ -327,9 +327,8 @@ __wt_logop_row_put_unpack( } int -__wt_logop_row_put_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_logop_row_put_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { WT_DECL_RET; uint32_t fileid; @@ -341,23 +340,24 @@ __wt_logop_row_put_print( WT_RET(__wt_logop_row_put_unpack( session, pp, end, &fileid, &key, &value)); - WT_RET(__wt_fprintf(out, " \"optype\": \"row_put\",\n")); - WT_ERR(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), + " \"optype\": \"row_put\",\n")); + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"fileid\": \"%" PRIu32 "\",\n", fileid)); WT_ERR(__logrec_make_json_str(session, &escaped, &key)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"key\": \"%s\",\n", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &key)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"key-hex\": \"%s\",\n", escaped)); } WT_ERR(__logrec_make_json_str(session, &escaped, &value)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"value\": \"%s\"", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &value)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), ",\n \"value-hex\": \"%s\"", escaped)); } @@ -406,9 +406,8 @@ __wt_logop_row_remove_unpack( } int -__wt_logop_row_remove_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_logop_row_remove_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { WT_DECL_RET; uint32_t fileid; @@ -419,15 +418,16 @@ __wt_logop_row_remove_print( WT_RET(__wt_logop_row_remove_unpack( session, pp, end, &fileid, &key)); - WT_RET(__wt_fprintf(out, " \"optype\": \"row_remove\",\n")); - WT_ERR(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), + " \"optype\": \"row_remove\",\n")); + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"fileid\": \"%" PRIu32 "\",\n", fileid)); WT_ERR(__logrec_make_json_str(session, &escaped, &key)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"key\": \"%s\"", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &key)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), ",\n \"key-hex\": \"%s\"", escaped)); } @@ -476,9 +476,8 @@ __wt_logop_row_truncate_unpack( } int -__wt_logop_row_truncate_print( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_logop_row_truncate_print(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { WT_DECL_RET; uint32_t fileid; @@ -491,26 +490,27 @@ __wt_logop_row_truncate_print( WT_RET(__wt_logop_row_truncate_unpack( session, pp, end, &fileid, &start, &stop, &mode)); - WT_RET(__wt_fprintf(out, " \"optype\": \"row_truncate\",\n")); - WT_ERR(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), + " \"optype\": \"row_truncate\",\n")); + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"fileid\": \"%" PRIu32 "\",\n", fileid)); WT_ERR(__logrec_make_json_str(session, &escaped, &start)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"start\": \"%s\",\n", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &start)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"start-hex\": \"%s\",\n", escaped)); } WT_ERR(__logrec_make_json_str(session, &escaped, &stop)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"stop\": \"%s\",\n", escaped)); if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) { WT_ERR(__logrec_make_hex_str(session, &escaped, &stop)); - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"stop-hex\": \"%s\",\n", escaped)); } - WT_ERR(__wt_fprintf(out, + WT_ERR(__wt_fprintf(session, WT_STDOUT(session), " \"mode\": \"%" PRIu32 "\"", mode)); err: __wt_free(session, escaped); @@ -518,9 +518,8 @@ err: __wt_free(session, escaped); } int -__wt_txn_op_printlog( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, - FILE *out, uint32_t flags) +__wt_txn_op_printlog(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { uint32_t optype, opsize; @@ -530,33 +529,27 @@ __wt_txn_op_printlog( switch (optype) { case WT_LOGOP_COL_PUT: - WT_RET(__wt_logop_col_put_print(session, pp, end, out, - flags)); + WT_RET(__wt_logop_col_put_print(session, pp, end, flags)); break; case WT_LOGOP_COL_REMOVE: - WT_RET(__wt_logop_col_remove_print(session, pp, end, out, - flags)); + WT_RET(__wt_logop_col_remove_print(session, pp, end, flags)); break; case WT_LOGOP_COL_TRUNCATE: - WT_RET(__wt_logop_col_truncate_print(session, pp, end, out, - flags)); + WT_RET(__wt_logop_col_truncate_print(session, pp, end, flags)); break; case WT_LOGOP_ROW_PUT: - WT_RET(__wt_logop_row_put_print(session, pp, end, out, - flags)); + WT_RET(__wt_logop_row_put_print(session, pp, end, flags)); break; case WT_LOGOP_ROW_REMOVE: - WT_RET(__wt_logop_row_remove_print(session, pp, end, out, - flags)); + WT_RET(__wt_logop_row_remove_print(session, pp, end, flags)); break; case WT_LOGOP_ROW_TRUNCATE: - WT_RET(__wt_logop_row_truncate_print(session, pp, end, out, - flags)); + WT_RET(__wt_logop_row_truncate_print(session, pp, end, flags)); break; WT_ILLEGAL_VALUE(session); diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c index 570d1c9ce48..b7efb1d9018 100644 --- a/src/third_party/wiredtiger/src/log/log_slot.c +++ b/src/third_party/wiredtiger/src/log/log_slot.c @@ -96,15 +96,13 @@ retry: slot->slot_end_lsn = slot->slot_start_lsn; end_offset = WT_LOG_SLOT_JOINED_BUFFERED(old_state) + slot->slot_unbuffered; - slot->slot_end_lsn.l.offset += end_offset; - WT_STAT_FAST_CONN_INCRV(session, - log_slot_consolidated, end_offset); + slot->slot_end_lsn.l.offset += (uint32_t)end_offset; + WT_STAT_FAST_CONN_INCRV(session, log_slot_consolidated, end_offset); /* * XXX Would like to change so one piece of code advances the LSN. */ log->alloc_lsn = slot->slot_end_lsn; - WT_ASSERT(session, - log->alloc_lsn.l.file >= log->write_lsn.l.file); + WT_ASSERT(session, log->alloc_lsn.l.file >= log->write_lsn.l.file); return (0); } diff --git a/src/third_party/wiredtiger/src/meta/meta_table.c b/src/third_party/wiredtiger/src/meta/meta_table.c index e5f2727b5b6..dd65f1a7ef9 100644 --- a/src/third_party/wiredtiger/src/meta/meta_table.c +++ b/src/third_party/wiredtiger/src/meta/meta_table.c @@ -290,6 +290,10 @@ __wt_metadata_search(WT_SESSION_IMPL *session, const char *key, char **valuep) WT_ERR(cursor->get_value(cursor, &value)); WT_ERR(__wt_strdup(session, value, valuep)); + err: WT_TRET(__wt_metadata_cursor_release(session, &cursor)); + + if (ret != 0) + __wt_free(session, *valuep); return (ret); } diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c index 471bb65cac0..0b287c228e5 100644 --- a/src/third_party/wiredtiger/src/meta/meta_turtle.c +++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c @@ -68,27 +68,27 @@ __metadata_init(WT_SESSION_IMPL *session) static int __metadata_load_hot_backup(WT_SESSION_IMPL *session) { - FILE *fp; WT_DECL_ITEM(key); WT_DECL_ITEM(value); WT_DECL_RET; + WT_FH *fh; bool exist; /* Look for a hot backup file: if we find it, load it. */ WT_RET(__wt_exist(session, WT_METADATA_BACKUP, &exist)); if (!exist) return (0); - WT_RET(__wt_fopen(session, - WT_METADATA_BACKUP, WT_FHANDLE_READ, 0, &fp)); + WT_RET(__wt_open(session, WT_METADATA_BACKUP, + WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY | WT_STREAM_READ, &fh)); /* Read line pairs and load them into the metadata file. */ WT_ERR(__wt_scr_alloc(session, 512, &key)); WT_ERR(__wt_scr_alloc(session, 512, &value)); for (;;) { - WT_ERR(__wt_getline(session, key, fp)); + WT_ERR(__wt_getline(session, key, fh)); if (key->size == 0) break; - WT_ERR(__wt_getline(session, value, fp)); + WT_ERR(__wt_getline(session, value, fh)); if (value->size == 0) WT_ERR(__wt_illegal_value(session, WT_METADATA_BACKUP)); WT_ERR(__wt_metadata_update(session, key->data, value->data)); @@ -96,7 +96,7 @@ __metadata_load_hot_backup(WT_SESSION_IMPL *session) F_SET(S2C(session), WT_CONN_WAS_BACKUP); -err: WT_TRET(__wt_fclose(&fp, WT_FHANDLE_READ)); +err: WT_TRET(__wt_close(session, &fh)); __wt_scr_free(session, &key); __wt_scr_free(session, &value); return (ret); @@ -240,9 +240,9 @@ err: __wt_free(session, metaconf); int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) { - FILE *fp; WT_DECL_ITEM(buf); WT_DECL_RET; + WT_FH *fh; bool exist, match; *valuep = NULL; @@ -257,20 +257,20 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) if (!exist) return (strcmp(key, WT_METAFILE_URI) == 0 ? __metadata_config(session, valuep) : WT_NOTFOUND); - WT_RET(__wt_fopen(session, - WT_METADATA_TURTLE, WT_FHANDLE_READ, 0, &fp)); + WT_RET(__wt_open(session, WT_METADATA_TURTLE, + WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY | WT_STREAM_READ, &fh)); /* Search for the key. */ WT_ERR(__wt_scr_alloc(session, 512, &buf)); for (match = false;;) { - WT_ERR(__wt_getline(session, buf, fp)); + WT_ERR(__wt_getline(session, buf, fh)); if (buf->size == 0) WT_ERR(WT_NOTFOUND); if (strcmp(key, buf->data) == 0) match = true; /* Key matched: read the subsequent line for the value. */ - WT_ERR(__wt_getline(session, buf, fp)); + WT_ERR(__wt_getline(session, buf, fh)); if (buf->size == 0) WT_ERR(__wt_illegal_value(session, WT_METADATA_TURTLE)); if (match) @@ -280,8 +280,11 @@ __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep) /* Copy the value for the caller. */ WT_ERR(__wt_strdup(session, buf->data, valuep)); -err: WT_TRET(__wt_fclose(&fp, WT_FHANDLE_READ)); +err: WT_TRET(__wt_close(session, &fh)); __wt_scr_free(session, &buf); + + if (ret != 0) + __wt_free(session, *valuep); return (ret); } @@ -304,8 +307,8 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) * Create the turtle setup file: we currently re-write it from scratch * every time. */ - WT_RET(__wt_open(session, - WT_METADATA_TURTLE_SET, true, true, WT_FILE_TYPE_TURTLE, &fh)); + WT_RET(__wt_open(session, WT_METADATA_TURTLE_SET, + WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &fh)); version = wiredtiger_version(&vmajor, &vminor, &vpatch); WT_ERR(__wt_scr_alloc(session, 2 * 1024, &buf)); @@ -317,7 +320,7 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value) WT_ERR(__wt_write(session, fh, 0, buf->size, buf->data)); /* Flush the handle and rename the file into place. */ - ret = __wt_fh_sync_and_rename( + ret = __wt_sync_handle_and_rename( session, &fh, WT_METADATA_TURTLE_SET, WT_METADATA_TURTLE); /* Close any file handle left open, remove any temporary file. */ diff --git a/src/third_party/wiredtiger/src/support/filename.c b/src/third_party/wiredtiger/src/os_common/filename.c index 215f5b47997..dfd67284948 100644 --- a/src/third_party/wiredtiger/src/support/filename.c +++ b/src/third_party/wiredtiger/src/os_common/filename.c @@ -24,8 +24,6 @@ __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path) * Build a file name in a scratch buffer. If the name is already an * absolute path duplicate it, otherwise generate a path relative to the * connection home directory. - * Needs to work with a NULL session handle - since this is called via - * the exists API which is used by the test utilities. */ int __wt_nfilename( @@ -36,6 +34,10 @@ __wt_nfilename( *path = NULL; + /* + * Needs to work with a NULL session handle - since this is called via + * the exists API which is used by the test utilities. + */ if (session == NULL || __wt_absolute_path(name)) WT_RET(__wt_strndup(session, name, namelen, path)); else { @@ -103,11 +105,11 @@ __wt_rename_and_sync_directory( } /* - * __wt_fh_sync_and_rename -- - * Sync and close a file, and swap it into place. + * __wt_sync_handle_and_rename -- + * Sync and close a handle, and swap it into place. */ int -__wt_fh_sync_and_rename( +__wt_sync_handle_and_rename( WT_SESSION_IMPL *session, WT_FH **fhp, const char *from, const char *to) { WT_DECL_RET; @@ -117,7 +119,7 @@ __wt_fh_sync_and_rename( *fhp = NULL; /* Flush to disk and close the handle. */ - ret = __wt_fsync(session, fh); + ret = __wt_fsync(session, fh, true); WT_TRET(__wt_close(session, &fh)); WT_RET(ret); @@ -125,20 +127,67 @@ __wt_fh_sync_and_rename( } /* - * __wt_sync_fp_and_rename -- - * Sync and close a file, and swap it into place. + * __wt_copy_and_sync -- + * Copy a file safely; here to support the wt utility. */ int -__wt_sync_fp_and_rename( - WT_SESSION_IMPL *session, FILE **fpp, const char *from, const char *to) +__wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to) { - FILE *fp; + WT_DECL_ITEM(tmp); + WT_DECL_RET; + WT_FH *ffh, *tfh; + WT_SESSION_IMPL *session; + wt_off_t n, offset, size; + char *buf; - fp = *fpp; - *fpp = NULL; + session = (WT_SESSION_IMPL *)wt_session; + ffh = tfh = NULL; + buf = NULL; - /* Flush to disk and close the handle. */ - WT_RET(__wt_fclose(&fp, WT_FHANDLE_WRITE)); + /* + * Remove the target file if it exists, then create a temporary file, + * copy the original into it and rename it into place. I don't think + * its necessary to remove the file, or create a copy and do a rename, + * it's likely safe to overwrite the backup file directly. I'm doing + * the remove and rename to insulate us from errors in other programs + * that might not detect a corrupted backup file; it's cheap insurance + * in a path where undetected failure is very bad. + */ + WT_ERR(__wt_scr_alloc(session, 0, &tmp)); + WT_ERR(__wt_buf_fmt(session, tmp, "%s.copy", to)); - return (__wt_rename_and_sync_directory(session, from, to)); + WT_ERR(__wt_remove_if_exists(session, to)); + WT_ERR(__wt_remove_if_exists(session, tmp->data)); + + /* Open the from and temporary file handles. */ + WT_ERR(__wt_open(session, from, + WT_FILE_TYPE_REGULAR, WT_OPEN_READONLY, &ffh)); + WT_ERR(__wt_open(session, tmp->data, + WT_FILE_TYPE_REGULAR, WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &tfh)); + + /* + * Allocate a copy buffer. Don't use a scratch buffer, this thing is + * big, and we don't want it hanging around. + */ +#define WT_BACKUP_COPY_SIZE (128 * 1024) + WT_ERR(__wt_malloc(session, WT_BACKUP_COPY_SIZE, &buf)); + + /* Get the file's size, then copy the bytes. */ + WT_ERR(__wt_filesize(session, ffh, &size)); + for (offset = 0; size > 0; size -= n, offset += n) { + n = WT_MIN(size, WT_BACKUP_COPY_SIZE); + WT_ERR(__wt_read(session, ffh, offset, (size_t)n, buf)); + WT_ERR(__wt_write(session, tfh, offset, (size_t)n, buf)); + } + + /* Close the from handle, then swap the temporary file into place. */ + WT_ERR(__wt_close(session, &ffh)); + ret = __wt_sync_handle_and_rename(session, &tfh, tmp->data, to); + +err: WT_TRET(__wt_close(session, &ffh)); + WT_TRET(__wt_close(session, &tfh)); + + __wt_free(session, buf); + __wt_scr_free(session, &tmp); + return (ret); } diff --git a/src/third_party/wiredtiger/src/os_posix/os_abort.c b/src/third_party/wiredtiger/src/os_common/os_abort.c index 034eedcfbf8..034eedcfbf8 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_abort.c +++ b/src/third_party/wiredtiger/src/os_common/os_abort.c diff --git a/src/third_party/wiredtiger/src/os_posix/os_alloc.c b/src/third_party/wiredtiger/src/os_common/os_alloc.c index cfc7b80450e..cfc7b80450e 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_alloc.c +++ b/src/third_party/wiredtiger/src/os_common/os_alloc.c diff --git a/src/third_party/wiredtiger/src/os_common/os_fhandle.c b/src/third_party/wiredtiger/src/os_common/os_fhandle.c new file mode 100644 index 00000000000..b16b2e24bfa --- /dev/null +++ b/src/third_party/wiredtiger/src/os_common/os_fhandle.c @@ -0,0 +1,306 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_handle_search -- + * Search for a matching handle. + */ +bool +__wt_handle_search(WT_SESSION_IMPL *session, + const char *name, bool increment_ref, WT_FH *newfh, WT_FH **fhp) +{ + WT_CONNECTION_IMPL *conn; + WT_FH *fh; + uint64_t bucket, hash; + bool found; + + if (fhp != NULL) + *fhp = NULL; + + conn = S2C(session); + found = false; + + hash = __wt_hash_city64(name, strlen(name)); + bucket = hash % WT_HASH_ARRAY_SIZE; + + __wt_spin_lock(session, &conn->fh_lock); + + /* + * If we already have the file open, optionally increment the reference + * count and return a pointer. + */ + TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) + if (strcmp(name, fh->name) == 0) { + if (increment_ref) + ++fh->ref; + if (fhp != NULL) + *fhp = fh; + found = true; + break; + } + + /* If we don't find a match, optionally add a new entry. */ + if (!found && newfh != NULL) { + newfh->name_hash = hash; + WT_CONN_FILE_INSERT(conn, newfh, bucket); + (void)__wt_atomic_add32(&conn->open_file_count, 1); + + if (increment_ref) + ++newfh->ref; + if (fhp != NULL) + *fhp = newfh; + } + + __wt_spin_unlock(session, &conn->fh_lock); + + return (found); +} + +/* + * __open_verbose -- + * Optionally output a verbose message on handle open. + */ +static inline int +__open_verbose(WT_SESSION_IMPL *session, + const char *name, uint32_t file_type, uint32_t flags) +{ +#ifdef HAVE_VERBOSE + WT_DECL_RET; + WT_DECL_ITEM(tmp); + const char *file_type_tag, *sep; + + if (!WT_VERBOSE_ISSET(session, WT_VERB_FILEOPS)) + return (0); + + /* + * It's useful to track file opens when debugging platforms, take some + * effort to output good tracking information. + */ + + switch (file_type) { + case WT_FILE_TYPE_CHECKPOINT: + file_type_tag = "checkpoint"; + break; + case WT_FILE_TYPE_DATA: + file_type_tag = "data"; + break; + case WT_FILE_TYPE_DIRECTORY: + file_type_tag = "directory"; + break; + case WT_FILE_TYPE_LOG: + file_type_tag = "log"; + break; + case WT_FILE_TYPE_REGULAR: + file_type_tag = "regular"; + break; + default: + file_type_tag = "unknown open type"; + break; + } + + WT_RET(__wt_scr_alloc(session, 0, &tmp)); + sep = " ("; +#define WT_OPEN_VERBOSE_FLAG(f, name) \ + if (LF_ISSET(f)) { \ + WT_ERR(__wt_buf_catfmt( \ + session, tmp, "%s%s", sep, name)); \ + sep = ", "; \ + } + + WT_OPEN_VERBOSE_FLAG(WT_OPEN_CREATE, "create"); + WT_OPEN_VERBOSE_FLAG(WT_OPEN_EXCLUSIVE, "exclusive"); + WT_OPEN_VERBOSE_FLAG(WT_OPEN_FIXED, "fixed"); + WT_OPEN_VERBOSE_FLAG(WT_OPEN_READONLY, "readonly"); + WT_OPEN_VERBOSE_FLAG(WT_STREAM_APPEND, "stream-append"); + WT_OPEN_VERBOSE_FLAG(WT_STREAM_READ, "stream-read"); + WT_OPEN_VERBOSE_FLAG(WT_STREAM_WRITE, "stream-write"); + + if (tmp->size != 0) + WT_ERR(__wt_buf_catfmt(session, tmp, ")")); + + ret = __wt_verbose(session, WT_VERB_FILEOPS, + "%s: handle-open: type %s%s", + name, file_type_tag, tmp->size == 0 ? "" : (char *)tmp->data); + +err: __wt_scr_free(session, &tmp); + return (ret); +#else + WT_UNUSED(session); + WT_UNUSED(name); + WT_UNUSED(file_type); + WT_UNUSED(flags); + return (0); +#endif +} + +/* + * __wt_open -- + * Open a file handle. + */ +int +__wt_open(WT_SESSION_IMPL *session, + const char *name, uint32_t file_type, uint32_t flags, WT_FH **fhp) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_FH *fh; + bool lock_file, open_called; + char *path; + + WT_ASSERT(session, file_type != 0); /* A file type is required. */ + + conn = S2C(session); + fh = NULL; + open_called = false; + path = NULL; + + WT_RET(__open_verbose(session, name, file_type, flags)); + + /* Check if the handle is already open. */ + if (__wt_handle_search(session, name, true, NULL, &fh)) { + /* + * XXX + * The in-memory implementation has to reset the file offset + * when a file is re-opened (which obviously also depends on + * in-memory configurations never opening a file in more than + * one thread at a time). This needs to be fixed. + */ + if (F_ISSET(fh, WT_FH_IN_MEMORY) && fh->ref == 1) + fh->off = 0; + *fhp = fh; + return (0); + } + + /* Allocate a structure and set the name. */ + WT_ERR(__wt_calloc_one(session, &fh)); + WT_ERR(__wt_strdup(session, name, &fh->name)); + + /* + * If this is a read-only connection, open all files read-only except + * the lock file. + * + * The only file created in read-only mode is the lock file. + */ + if (F_ISSET(conn, WT_CONN_READONLY)) { + lock_file = strcmp(name, WT_SINGLETHREAD) == 0; + if (!lock_file) + LF_SET(WT_OPEN_READONLY); + WT_ASSERT(session, lock_file || !LF_ISSET(WT_OPEN_CREATE)); + } + + /* Create the path to the file. */ + if (!LF_ISSET(WT_OPEN_FIXED)) + WT_ERR(__wt_filename(session, name, &path)); + + /* Call the underlying open function. */ + WT_ERR(conn->handle_open( + session, fh, path == NULL ? name : path, file_type, flags)); + open_called = true; + + /* + * Repeat the check for a match: if there's no match, link our newly + * created handle onto the database's list of files. + */ + if (__wt_handle_search(session, name, true, fh, fhp)) { +err: if (open_called) + WT_TRET(fh->fh_close(session, fh)); + if (fh != NULL) { + __wt_free(session, fh->name); + __wt_free(session, fh); + } + } + + __wt_free(session, path); + return (ret); +} + +/* + * __wt_close -- + * Close a file handle. + */ +int +__wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_FH *fh; + uint64_t bucket; + + conn = S2C(session); + + if (*fhp == NULL) + return (0); + fh = *fhp; + *fhp = NULL; + + /* Track handle-close as a file operation, so open and close match. */ + WT_RET(__wt_verbose( + session, WT_VERB_FILEOPS, "%s: handle-close", fh->name)); + + /* + * If the reference count hasn't gone to 0, or if it's an in-memory + * object, we're done. + * + * Assert the reference count is correct, but don't let it wrap. + */ + __wt_spin_lock(session, &conn->fh_lock); + WT_ASSERT(session, fh->ref > 0); + if ((fh->ref > 0 && --fh->ref > 0) || F_ISSET(fh, WT_FH_IN_MEMORY)) { + __wt_spin_unlock(session, &conn->fh_lock); + return (0); + } + + /* Remove from the list. */ + bucket = fh->name_hash % WT_HASH_ARRAY_SIZE; + WT_CONN_FILE_REMOVE(conn, fh, bucket); + (void)__wt_atomic_sub32(&conn->open_file_count, 1); + + __wt_spin_unlock(session, &conn->fh_lock); + + /* Discard underlying resources. */ + ret = fh->fh_close(session, fh); + + __wt_free(session, fh->name); + __wt_free(session, fh); + + return (ret); +} + +/* + * __wt_close_connection_close -- + * Close any open file handles at connection close. + */ +int +__wt_close_connection_close(WT_SESSION_IMPL *session) +{ + WT_DECL_RET; + WT_FH *fh; + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + + while ((fh = TAILQ_FIRST(&conn->fhqh)) != NULL) { + /* + * In-memory configurations will have open files, but the ref + * counts should be zero. + */ + if (!F_ISSET(conn, WT_CONN_IN_MEMORY) || fh->ref != 0) { + ret = EBUSY; + __wt_errx(session, + "Connection has open file handles: %s", fh->name); + } + + fh->ref = 1; + F_CLR(fh, WT_FH_IN_MEMORY); + + WT_TRET(__wt_close(session, &fh)); + } + return (ret); +} diff --git a/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c b/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c new file mode 100644 index 00000000000..260514eac66 --- /dev/null +++ b/src/third_party/wiredtiger/src/os_common/os_fs_inmemory.c @@ -0,0 +1,482 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +static int __im_handle_size(WT_SESSION_IMPL *, WT_FH *, wt_off_t *); + +/* + * In-memory information. + */ +typedef struct { + WT_SPINLOCK lock; +} WT_IM; + +/* + * __im_directory_list -- + * Get a list of files from a directory, in-memory version. + */ +static int +__im_directory_list(WT_SESSION_IMPL *session, const char *dir, + const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) +{ + WT_UNUSED(session); + WT_UNUSED(dir); + WT_UNUSED(prefix); + WT_UNUSED(flags); + WT_UNUSED(dirlist); + WT_UNUSED(countp); + + WT_RET_MSG(session, ENOTSUP, "directory-list"); +} + +/* + * __im_directory_sync -- + * Flush a directory to ensure file creation is durable. + */ +static int +__im_directory_sync(WT_SESSION_IMPL *session, const char *path) +{ + WT_UNUSED(session); + WT_UNUSED(path); + return (0); +} + +/* + * __im_file_exist -- + * Return if the file exists. + */ +static int +__im_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + *existp = __wt_handle_search(session, name, false, NULL, NULL); + return (0); +} + +/* + * __im_file_remove -- + * POSIX remove. + */ +static int +__im_file_remove(WT_SESSION_IMPL *session, const char *name) +{ + WT_DECL_RET; + WT_FH *fh; + + if (__wt_handle_search(session, name, true, NULL, &fh)) { + WT_ASSERT(session, fh->ref == 1); + + /* Force a discard of the handle. */ + F_CLR(fh, WT_FH_IN_MEMORY); + ret = __wt_close(session, &fh); + } + return (ret); +} + +/* + * __im_file_rename -- + * POSIX rename. + */ +static int +__im_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_FH *fh; + uint64_t bucket, hash; + char *to_name; + + conn = S2C(session); + + /* We'll need a copy of the target name. */ + WT_RET(__wt_strdup(session, to, &to_name)); + + __wt_spin_lock(session, &conn->fh_lock); + + /* Make sure the target name isn't active. */ + hash = __wt_hash_city64(to, strlen(to)); + bucket = hash % WT_HASH_ARRAY_SIZE; + TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) + if (strcmp(to, fh->name) == 0) + WT_ERR(EPERM); + + /* Find the source name. */ + hash = __wt_hash_city64(from, strlen(from)); + bucket = hash % WT_HASH_ARRAY_SIZE; + TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) + if (strcmp(from, fh->name) == 0) + break; + if (fh == NULL) + WT_ERR(ENOENT); + + /* Remove source from the list. */ + WT_CONN_FILE_REMOVE(conn, fh, bucket); + + /* Swap the names. */ + __wt_free(session, fh->name); + fh->name = to_name; + to_name = NULL; + + /* Put source back on the list. */ + hash = __wt_hash_city64(to, strlen(to)); + bucket = hash % WT_HASH_ARRAY_SIZE; + WT_CONN_FILE_INSERT(conn, fh, bucket); + + if (0) { +err: __wt_free(session, to_name); + } + __wt_spin_unlock(session, &conn->fh_lock); + + return (ret); +} + +/* + * __im_file_size -- + * Get the size of a file in bytes, by file name. + */ +static int +__im_file_size( + WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +{ + WT_DECL_RET; + WT_FH *fh; + WT_IM *im; + + WT_UNUSED(silent); + + im = S2C(session)->inmemory; + __wt_spin_lock(session, &im->lock); + + if (__wt_handle_search(session, name, true, NULL, &fh)) { + WT_ERR(__im_handle_size(session, fh, sizep)); + WT_ERR(__wt_close(session, &fh)); + } else + ret = ENOENT; + +err: __wt_spin_unlock(session, &im->lock); + return (ret); +} + +/* + * __im_handle_advise -- + * POSIX fadvise. + */ +static int +__im_handle_advise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(advice); + return (ENOTSUP); +} + +/* + * __im_handle_close -- + * ANSI C close/fclose. + */ +static int +__im_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +{ + __wt_buf_free(session, &fh->buf); + + return (0); +} + +/* + * __im_handle_getc -- + * ANSI C fgetc. + */ +static int +__im_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) +{ + WT_IM *im; + + im = S2C(session)->inmemory; + __wt_spin_lock(session, &im->lock); + + if (fh->off >= fh->buf.size) + *chp = EOF; + else + *chp = ((char *)fh->buf.data)[fh->off++]; + + __wt_spin_unlock(session, &im->lock); + return (0); +} + +/* + * __im_handle_lock -- + * Lock/unlock a file. + */ +static int +__im_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +{ + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(lock); + return (0); +} + +/* + * __im_handle_printf -- + * ANSI C vfprintf. + */ +static int +__im_handle_printf( + WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + va_list ap_copy; + WT_DECL_ITEM(tmp); + WT_DECL_RET; + WT_IM *im; + size_t len; + + im = S2C(session)->inmemory; + + /* Build the string we're writing. */ + WT_RET(__wt_scr_alloc(session, strlen(fmt) * 2 + 128, &tmp)); + for (;;) { + va_copy(ap_copy, ap); + len = (size_t)vsnprintf(tmp->mem, tmp->memsize, fmt, ap_copy); + va_end(ap_copy); + if (len < tmp->memsize) { + tmp->data = tmp->mem; + tmp->size = len; + break; + } + WT_ERR(__wt_buf_extend(session, tmp, len + 1)); + } + + __wt_spin_lock(session, &im->lock); + + /* Grow the handle's buffer as necessary. */ + WT_ERR(__wt_buf_grow(session, &fh->buf, fh->off + len)); + + /* Copy the data into place and update the offset. */ + memcpy((uint8_t *)fh->buf.mem + fh->off, tmp->data, len); + fh->off += len; + +err: __wt_spin_unlock(session, &im->lock); + + __wt_scr_free(session, &tmp); + return (ret); +} + +/* + * __im_handle_read -- + * POSIX pread. + */ +static int +__im_handle_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + WT_DECL_RET; + WT_IM *im; + size_t off; + + im = S2C(session)->inmemory; + __wt_spin_lock(session, &im->lock); + + off = (size_t)offset; + if (off < fh->buf.size) { + len = WT_MIN(len, fh->buf.size - off); + memcpy(buf, (uint8_t *)fh->buf.mem + off, len); + fh->off = off + len; + } else + ret = WT_ERROR; + + __wt_spin_unlock(session, &im->lock); + if (ret == 0) + return (0); + WT_RET_MSG(session, WT_ERROR, + "%s: handle-read: failed to read %" WT_SIZET_FMT " bytes at " + "offset %" WT_SIZET_FMT, + fh->name, len, off); +} + +/* + * __im_handle_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +__im_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + WT_UNUSED(session); + + /* + * XXX hack - MongoDB assumes that any file with content will have a + * non-zero size. In memory tables generally are zero-sized, make + * MongoDB happy. + */ + *sizep = fh->buf.size == 0 ? 1024 : (wt_off_t)fh->buf.size; + return (0); +} + +/* + * __im_handle_sync -- + * POSIX fflush/fsync. + */ +static int +__im_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + WT_UNUSED(session); + WT_UNUSED(fh); + + /* + * Callers attempting asynchronous flush handle ENOTSUP returns, and + * won't make further attempts. + */ + return (block ? 0 : ENOTSUP); +} + +/* + * __im_handle_truncate -- + * POSIX ftruncate. + */ +static int +__im_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset) +{ + WT_DECL_RET; + WT_IM *im; + size_t off; + + im = S2C(session)->inmemory; + __wt_spin_lock(session, &im->lock); + + /* + * Grow the buffer as necessary, clear any new space in the file, + * and reset the file's data length. + */ + off = (size_t)offset; + WT_ERR(__wt_buf_grow(session, &fh->buf, off)); + if (fh->buf.size < off) + memset((uint8_t *) + fh->buf.data + fh->buf.size, 0, off - fh->buf.size); + fh->buf.size = off; + +err: __wt_spin_unlock(session, &im->lock); + return (ret); +} + +/* + * __im_handle_write -- + * POSIX pwrite. + */ +static int +__im_handle_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + WT_DECL_RET; + WT_IM *im; + size_t off; + + im = S2C(session)->inmemory; + __wt_spin_lock(session, &im->lock); + + off = (size_t)offset; + WT_ERR(__wt_buf_grow(session, &fh->buf, off + len + 1024)); + + memcpy((uint8_t *)fh->buf.data + off, buf, len); + if (off + len > fh->buf.size) + fh->buf.size = off + len; + fh->off = off + len; + +err: __wt_spin_unlock(session, &im->lock); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, + "%s: handle-write: failed to write %" WT_SIZET_FMT " bytes at " + "offset %" WT_SIZET_FMT, + fh->name, len, off); +} + +/* + * __im_handle_open -- + * POSIX fopen/open. + */ +static int +__im_handle_open(WT_SESSION_IMPL *session, + WT_FH *fh, const char *path, uint32_t file_type, uint32_t flags) +{ + WT_UNUSED(session); + WT_UNUSED(path); + WT_UNUSED(file_type); + WT_UNUSED(flags); + + fh->off = 0; + F_SET(fh, WT_FH_IN_MEMORY); + + fh->fh_advise = __im_handle_advise; + fh->fh_close = __im_handle_close; + fh->fh_getc = __im_handle_getc; + fh->fh_lock = __im_handle_lock; + fh->fh_printf = __im_handle_printf; + fh->fh_read = __im_handle_read; + fh->fh_size = __im_handle_size; + fh->fh_sync = __im_handle_sync; + fh->fh_truncate = __im_handle_truncate; + fh->fh_write = __im_handle_write; + + return (0); +} + +/* + * __wt_os_inmemory -- + * Initialize an in-memory configuration. + */ +int +__wt_os_inmemory(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_IM *im; + + conn = S2C(session); + im = NULL; + + /* Initialize the in-memory jump table. */ + conn->file_directory_list = __im_directory_list; + conn->file_directory_sync = __im_directory_sync; + conn->file_exist = __im_file_exist; + conn->file_remove = __im_file_remove; + conn->file_rename = __im_file_rename; + conn->file_size = __im_file_size; + conn->handle_open = __im_handle_open; + + /* Allocate an in-memory structure. */ + WT_RET(__wt_calloc_one(session, &im)); + WT_ERR(__wt_spin_init(session, &im->lock, "in-memory I/O")); + conn->inmemory = im; + + return (0); + +err: __wt_free(session, im); + return (ret); +} + +/* + * __wt_os_inmemory_cleanup -- + * Discard an in-memory configuration. + */ +int +__wt_os_inmemory_cleanup(WT_SESSION_IMPL *session) +{ + WT_DECL_RET; + WT_IM *im; + + if ((im = S2C(session)->inmemory) == NULL) + return (0); + S2C(session)->inmemory = NULL; + + __wt_spin_destroy(session, &im->lock); + __wt_free(session, im); + + return (ret); +} diff --git a/src/third_party/wiredtiger/src/os_common/os_fs_stdio.c b/src/third_party/wiredtiger/src/os_common/os_fs_stdio.c new file mode 100644 index 00000000000..9baba9b6945 --- /dev/null +++ b/src/third_party/wiredtiger/src/os_common/os_fs_stdio.c @@ -0,0 +1,239 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __stdio_handle_advise -- + * POSIX fadvise. + */ +static int +__stdio_handle_advise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(advice); + WT_RET_MSG(session, ENOTSUP, "%s: handle-advise", fh->name); +} + +/* + * __stdio_handle_allocate -- + * POSIX fallocate. + */ +static int +__stdio_handle_allocate( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) +{ + WT_UNUSED(offset); + WT_UNUSED(len); + WT_RET_MSG(session, ENOTSUP, "%s: handle-allocate", fh->name); +} + +/* + * __stdio_handle_close -- + * ANSI C close/fclose. + */ +static int +__stdio_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_RET_MSG(session, ENOTSUP, "%s: handle-close", fh->name); +} + +/* + * __stdio_handle_getc -- + * ANSI C fgetc. + */ +static int +__stdio_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) +{ + WT_UNUSED(chp); + WT_RET_MSG(session, ENOTSUP, "%s: handle-getc", fh->name); +} + +/* + * __stdio_handle_lock -- + * Lock/unlock a file. + */ +static int +__stdio_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +{ + WT_UNUSED(lock); + WT_RET_MSG(session, ENOTSUP, "%s: handle-lock", fh->name); +} + +/* + * __stdio_handle_map -- + * Map a file. + */ +static int +__stdio_handle_map(WT_SESSION_IMPL *session, + WT_FH *fh, void *p, size_t *lenp, void **mappingcookie) +{ + WT_UNUSED(p); + WT_UNUSED(lenp); + WT_UNUSED(mappingcookie); + WT_RET_MSG(session, ENOTSUP, "%s: handle-map", fh->name); +} + +/* + * __stdio_handle_map_discard -- + * Discard a section of a mapped region. + */ +static int +__stdio_handle_map_discard( + WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t len) +{ + WT_UNUSED(p); + WT_UNUSED(len); + WT_RET_MSG(session, ENOTSUP, "%s: handle-map-discard", fh->name); +} + +/* + * __stdio_handle_map_preload -- + * Preload a section of a mapped region. + */ +static int +__stdio_handle_map_preload( + WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t len) +{ + WT_UNUSED(p); + WT_UNUSED(len); + WT_RET_MSG(session, ENOTSUP, "%s: handle-map-preload", fh->name); +} + +/* + * __stdio_handle_map_unmap -- + * Unmap a file. + */ +static int +__stdio_handle_map_unmap(WT_SESSION_IMPL *session, + WT_FH *fh, void *p, size_t len, void **mappingcookie) +{ + WT_UNUSED(p); + WT_UNUSED(len); + WT_UNUSED(mappingcookie); + WT_RET_MSG(session, ENOTSUP, "%s: handle-map-unmap", fh->name); +} + +/* + * __stdio_handle_printf -- + * ANSI C vfprintf. + */ +static int +__stdio_handle_printf( + WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + if (vfprintf(fh->fp, fmt, ap) >= 0) + return (0); + WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); +} + +/* + * __stdio_handle_read -- + * POSIX pread. + */ +static int +__stdio_handle_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(buf); + WT_RET_MSG(session, ENOTSUP, "%s: handle-read", fh->name); +} + +/* + * __stdio_handle_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +__stdio_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + WT_UNUSED(sizep); + WT_RET_MSG(session, ENOTSUP, "%s: handle-size", fh->name); +} + +/* + * __stdio_handle_sync -- + * POSIX fflush/fsync. + */ +static int +__stdio_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + WT_UNUSED(block); + + if (fflush(fh->fp) == 0) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); +} + +/* + * __stdio_handle_truncate -- + * POSIX ftruncate. + */ +static int +__stdio_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_UNUSED(len); + WT_RET_MSG(session, ENOTSUP, "%s: handle-truncate", fh->name); +} + +/* + * __stdio_handle_write -- + * POSIX pwrite. + */ +static int +__stdio_handle_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(buf); + WT_RET_MSG(session, ENOTSUP, "%s: handle-write", fh->name); +} + +/* + * __stdio_func_init -- + * Initialize stdio functions. + */ +static void +__stdio_func_init(WT_FH *fh, const char *name, FILE *fp) +{ + fh->name = name; + fh->fp = fp; + + fh->fh_advise = __stdio_handle_advise; + fh->fh_allocate = __stdio_handle_allocate; + fh->fh_close = __stdio_handle_close; + fh->fh_getc = __stdio_handle_getc; + fh->fh_lock = __stdio_handle_lock; + fh->fh_map = __stdio_handle_map; + fh->fh_map_discard = __stdio_handle_map_discard; + fh->fh_map_preload = __stdio_handle_map_preload; + fh->fh_map_unmap = __stdio_handle_map_unmap; + fh->fh_printf = __stdio_handle_printf; + fh->fh_read = __stdio_handle_read; + fh->fh_size = __stdio_handle_size; + fh->fh_sync = __stdio_handle_sync; + fh->fh_truncate = __stdio_handle_truncate; + fh->fh_write = __stdio_handle_write; +} + +/* + * __wt_os_stdio -- + * Initialize the stdio configuration. + */ +int +__wt_os_stdio(WT_SESSION_IMPL *session) +{ + __stdio_func_init(WT_STDERR(session), "stderr", stderr); + __stdio_func_init(WT_STDOUT(session), "stdout", stdout); + + return (0); +} diff --git a/src/third_party/wiredtiger/src/os_posix/os_getline.c b/src/third_party/wiredtiger/src/os_common/os_getline.c index c0ca96852de..01e11581edf 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_getline.c +++ b/src/third_party/wiredtiger/src/os_common/os_getline.c @@ -20,7 +20,7 @@ * (so the caller's EOF marker is a returned line length of 0). */ int -__wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, FILE *fp) +__wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_FH *fh) { int c; @@ -30,7 +30,11 @@ __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, FILE *fp) */ WT_RET(__wt_buf_init(session, buf, 100)); - while ((c = fgetc(fp)) != EOF) { + for (;;) { + WT_RET(fh->fh_getc(session, fh, &c)); + if (c == EOF) + break; + /* Leave space for a trailing NUL. */ WT_RET(__wt_buf_extend(session, buf, buf->size + 2)); if (c == '\n') { @@ -40,8 +44,6 @@ __wt_getline(WT_SESSION_IMPL *session, WT_ITEM *buf, FILE *fp) } ((char *)buf->mem)[buf->size++] = (char)c; } - if (c == EOF && ferror(fp)) - WT_RET_MSG(session, __wt_errno(), "file read"); ((char *)buf->mem)[buf->size] = '\0'; diff --git a/src/third_party/wiredtiger/src/os_posix/os_getopt.c b/src/third_party/wiredtiger/src/os_common/os_getopt.c index 0306ad1d79d..0306ad1d79d 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_getopt.c +++ b/src/third_party/wiredtiger/src/os_common/os_getopt.c diff --git a/src/third_party/wiredtiger/src/os_common/os_init.c b/src/third_party/wiredtiger/src/os_common/os_init.c new file mode 100644 index 00000000000..512216c52a5 --- /dev/null +++ b/src/third_party/wiredtiger/src/os_common/os_init.c @@ -0,0 +1,41 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_os_init -- + * Initialize the OS layer. + */ +int +__wt_os_init(WT_SESSION_IMPL *session) +{ + return (F_ISSET(S2C(session), WT_CONN_IN_MEMORY) ? + __wt_os_inmemory(session) : +#if defined(_MSC_VER) + __wt_os_win(session)); +#else + __wt_os_posix(session)); +#endif +} + +/* + * __wt_os_cleanup -- + * Clean up the OS layer. + */ +int +__wt_os_cleanup(WT_SESSION_IMPL *session) +{ + return (F_ISSET(S2C(session), WT_CONN_IN_MEMORY) ? + __wt_os_inmemory_cleanup(session) : +#if defined(_MSC_VER) + __wt_os_win_cleanup(session)); +#else + __wt_os_posix_cleanup(session)); +#endif +} diff --git a/src/third_party/wiredtiger/src/os_posix/os_strtouq.c b/src/third_party/wiredtiger/src/os_common/os_strtouq.c index 0ae604fc761..0ae604fc761 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_strtouq.c +++ b/src/third_party/wiredtiger/src/os_common/os_strtouq.c diff --git a/src/third_party/wiredtiger/src/os_posix/os_dir.c b/src/third_party/wiredtiger/src/os_posix/os_dir.c index 83e77aa5312..78ae5f8edd4 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_dir.c +++ b/src/third_party/wiredtiger/src/os_posix/os_dir.c @@ -11,13 +11,12 @@ #include <dirent.h> /* - * __wt_dirlist -- - * Get a list of files from a directory, optionally filtered by - * a given prefix. + * __wt_posix_directory_list -- + * Get a list of files from a directory, POSIX version. */ int -__wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, - uint32_t flags, char ***dirlist, u_int *countp) +__wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, + const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) { struct dirent *dp; DIR *dirp; @@ -36,24 +35,20 @@ __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, dirallocsz = 0; dirsz = 0; entries = NULL; - if (flags == 0) - LF_SET(WT_DIRLIST_INCLUDE); - - WT_ERR(__wt_verbose(session, WT_VERB_FILEOPS, - "wt_dirlist of %s %s prefix %s", - path, LF_ISSET(WT_DIRLIST_INCLUDE) ? "include" : "exclude", - prefix == NULL ? "all" : prefix)); WT_SYSCALL_RETRY(((dirp = opendir(path)) == NULL ? 1 : 0), ret); if (ret != 0) - WT_ERR_MSG(session, ret, "%s: opendir", path); - for (dirsz = 0, count = 0; (dp = readdir(dirp)) != NULL;) { + WT_ERR_MSG(session, ret, "%s: directory-list: opendir", path); + + for (count = 0; (dp = readdir(dirp)) != NULL;) { /* * Skip . and .. */ if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0) continue; + + /* The list of files is optionally filtered by a prefix. */ match = false; if (prefix != NULL && ((LF_ISSET(WT_DIRLIST_INCLUDE) && @@ -78,8 +73,8 @@ __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, if (count > 0) *dirlist = entries; *countp = count; -err: - if (dirp != NULL) + +err: if (dirp != NULL) (void)closedir(dirp); __wt_free(session, path); @@ -91,5 +86,7 @@ err: __wt_free(session, entries[count]); __wt_free(session, entries); } - WT_RET_MSG(session, ret, "dirlist %s prefix %s", dir, prefix); + WT_RET_MSG(session, ret, + "%s: directory-list, prefix \"%s\"", + dir, prefix == NULL ? "" : prefix); } diff --git a/src/third_party/wiredtiger/src/os_posix/os_exist.c b/src/third_party/wiredtiger/src/os_posix/os_exist.c deleted file mode 100644 index 87f0e219d2e..00000000000 --- a/src/third_party/wiredtiger/src/os_posix/os_exist.c +++ /dev/null @@ -1,38 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_exist -- - * Return if the file exists. - */ -int -__wt_exist(WT_SESSION_IMPL *session, const char *filename, bool *existp) -{ - struct stat sb; - WT_DECL_RET; - char *path; - - *existp = false; - - WT_RET(__wt_filename(session, filename, &path)); - - WT_SYSCALL_RETRY(stat(path, &sb), ret); - - __wt_free(session, path); - - if (ret == 0) { - *existp = true; - return (0); - } - if (ret == ENOENT) - return (0); - - WT_RET_MSG(session, ret, "%s: fstat", filename); -} diff --git a/src/third_party/wiredtiger/src/os_posix/os_fallocate.c b/src/third_party/wiredtiger/src/os_posix/os_fallocate.c index bf20a99bdef..22879d36182 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_fallocate.c +++ b/src/third_party/wiredtiger/src/os_posix/os_fallocate.c @@ -13,11 +13,11 @@ #include <sys/syscall.h> #endif /* - * __wt_fallocate_config -- - * Configure file-extension behavior for a file handle. + * __wt_posix_handle_allocate_configure -- + * Configure POSIX file-extension behavior for a file handle. */ void -__wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh) +__wt_posix_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh) { WT_UNUSED(session); @@ -40,11 +40,11 @@ __wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh) } /* - * __wt_std_fallocate -- + * __posix_std_fallocate -- * Linux fallocate call. */ static int -__wt_std_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) +__posix_std_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) { #if defined(HAVE_FALLOCATE) WT_DECL_RET; @@ -60,11 +60,11 @@ __wt_std_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) } /* - * __wt_sys_fallocate -- + * __posix_sys_fallocate -- * Linux fallocate call (system call version). */ static int -__wt_sys_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) +__posix_sys_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) { #if defined(__linux__) && defined(SYS_fallocate) WT_DECL_RET; @@ -86,11 +86,11 @@ __wt_sys_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) } /* - * __wt_posix_fallocate -- + * __posix_posix_fallocate -- * POSIX fallocate call. */ static int -__wt_posix_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) +__posix_posix_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) { #if defined(HAVE_POSIX_FALLOCATE) WT_DECL_RET; @@ -106,36 +106,29 @@ __wt_posix_fallocate(WT_FH *fh, wt_off_t offset, wt_off_t len) } /* - * __wt_fallocate -- - * Extend a file. + * __wt_posix_handle_allocate -- + * POSIX fallocate. */ int -__wt_fallocate( +__wt_posix_handle_allocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) { WT_DECL_RET; - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); switch (fh->fallocate_available) { /* * Check for already configured handles and make the configured call. */ case WT_FALLOCATE_POSIX: - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s: posix_fallocate", fh->name)); - if ((ret = __wt_posix_fallocate(fh, offset, len)) == 0) + if ((ret = __posix_posix_fallocate(fh, offset, len)) == 0) return (0); WT_RET_MSG(session, ret, "%s: posix_fallocate", fh->name); case WT_FALLOCATE_STD: - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s: fallocate", fh->name)); - if ((ret = __wt_std_fallocate(fh, offset, len)) == 0) + if ((ret = __posix_std_fallocate(fh, offset, len)) == 0) return (0); WT_RET_MSG(session, ret, "%s: fallocate", fh->name); case WT_FALLOCATE_SYS: - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s: sys_fallocate", fh->name)); - if ((ret = __wt_sys_fallocate(fh, offset, len)) == 0) + if ((ret = __posix_sys_fallocate(fh, offset, len)) == 0) return (0); WT_RET_MSG(session, ret, "%s: sys_fallocate", fh->name); @@ -152,17 +145,17 @@ __wt_fallocate( * fallocate (and the system call version of fallocate) first to * avoid locking on Linux if at all possible. */ - if ((ret = __wt_std_fallocate(fh, offset, len)) == 0) { + if ((ret = __posix_std_fallocate(fh, offset, len)) == 0) { fh->fallocate_available = WT_FALLOCATE_STD; fh->fallocate_requires_locking = false; return (0); } - if ((ret = __wt_sys_fallocate(fh, offset, len)) == 0) { + if ((ret = __posix_sys_fallocate(fh, offset, len)) == 0) { fh->fallocate_available = WT_FALLOCATE_SYS; fh->fallocate_requires_locking = false; return (0); } - if ((ret = __wt_posix_fallocate(fh, offset, len)) == 0) { + if ((ret = __posix_posix_fallocate(fh, offset, len)) == 0) { fh->fallocate_available = WT_FALLOCATE_POSIX; #if !defined(__linux__) fh->fallocate_requires_locking = false; diff --git a/src/third_party/wiredtiger/src/os_posix/os_filesize.c b/src/third_party/wiredtiger/src/os_posix/os_filesize.c deleted file mode 100644 index 72242e351bf..00000000000 --- a/src/third_party/wiredtiger/src/os_posix/os_filesize.c +++ /dev/null @@ -1,62 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_filesize -- - * Get the size of a file in bytes. - */ -int -__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) -{ - struct stat sb; - WT_DECL_RET; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: fstat", fh->name)); - - WT_SYSCALL_RETRY(fstat(fh->fd, &sb), ret); - if (ret == 0) { - *sizep = sb.st_size; - return (0); - } - - WT_RET_MSG(session, ret, "%s: fstat", fh->name); -} - -/* - * __wt_filesize_name -- - * Return the size of a file in bytes, given a file name. - */ -int -__wt_filesize_name(WT_SESSION_IMPL *session, - const char *filename, bool silent, wt_off_t *sizep) -{ - struct stat sb; - WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, filename, &path)); - - WT_SYSCALL_RETRY(stat(path, &sb), ret); - - __wt_free(session, path); - - if (ret == 0) { - *sizep = sb.st_size; - return (0); - } - - /* - * Some callers of this function expect failure if the file doesn't - * exist, and don't want an error message logged. - */ - if (!silent) - WT_RET_MSG(session, ret, "%s: fstat", filename); - return (ret); -} diff --git a/src/third_party/wiredtiger/src/os_posix/os_flock.c b/src/third_party/wiredtiger/src/os_posix/os_flock.c deleted file mode 100644 index e2056f7636c..00000000000 --- a/src/third_party/wiredtiger/src/os_posix/os_flock.c +++ /dev/null @@ -1,38 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_bytelock -- - * Lock/unlock a byte in a file. - */ -int -__wt_bytelock(WT_FH *fhp, wt_off_t byte, bool lock) -{ - struct flock fl; - WT_DECL_RET; - - /* - * WiredTiger requires this function be able to acquire locks past - * the end of file. - * - * Note we're using fcntl(2) locking: all fcntl locks associated with a - * file for a given process are removed when any file descriptor for the - * file is closed by the process, even if a lock was never requested for - * that file descriptor. - */ - fl.l_start = byte; - fl.l_len = 1; - fl.l_type = lock ? F_WRLCK : F_UNLCK; - fl.l_whence = SEEK_SET; - - WT_SYSCALL_RETRY(fcntl(fhp->fd, F_SETLK, &fl), ret); - - return (ret); -} diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c new file mode 100644 index 00000000000..86aa8db8f4f --- /dev/null +++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c @@ -0,0 +1,736 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __posix_sync -- + * Underlying support function to flush a file handle. + */ +static int +__posix_sync(WT_SESSION_IMPL *session, + int fd, const char *name, const char *func, bool block) +{ + WT_DECL_RET; + + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + +#ifdef HAVE_SYNC_FILE_RANGE + if (!block) { + WT_SYSCALL_RETRY(sync_file_range(fd, + (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: %s: sync_file_range", name, func); + } +#else + /* + * Callers attempting asynchronous flush handle ENOTSUP returns, and + * won't make further attempts. + */ + if (!block) + return (ENOTSUP); +#endif + +#if defined(F_FULLFSYNC) + /* + * OS X fsync documentation: + * "Note that while fsync() will flush all data from the host to the + * drive (i.e. the "permanent storage device"), the drive itself may + * not physically write the data to the platters for quite some time + * and it may be written in an out-of-order sequence. For applications + * that require tighter guarantees about the integrity of their data, + * Mac OS X provides the F_FULLFSYNC fcntl. The F_FULLFSYNC fcntl asks + * the drive to flush all buffered data to permanent storage." + * + * OS X F_FULLFSYNC fcntl documentation: + * "This is currently implemented on HFS, MS-DOS (FAT), and Universal + * Disk Format (UDF) file systems." + */ + WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0), ret); + if (ret == 0) + return (0); + /* + * Assume F_FULLFSYNC failed because the file system doesn't support it + * and fallback to fsync. + */ +#endif +#if defined(HAVE_FDATASYNC) + WT_SYSCALL_RETRY(fdatasync(fd), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: %s: fdatasync", name, func); +#else + WT_SYSCALL_RETRY(fsync(fd), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: %s: fsync", name, func); +#endif +} + +/* + * __posix_directory_sync -- + * Flush a directory to ensure file creation is durable. + */ +static int +__posix_directory_sync(WT_SESSION_IMPL *session, const char *path) +{ +#ifdef __linux__ + WT_DECL_RET; + int fd, tret; + const char *dir; + char *copy; + + tret = 0; + /* + * POSIX 1003.1 does not require that fsync of a file handle ensures the + * entry in the directory containing the file has also reached disk (and + * there are historic Linux filesystems requiring this), do an explicit + * fsync on a file descriptor for the directory to be sure. + */ + copy = NULL; + if (path == NULL || (dir = strrchr(path, '/')) == NULL) + path = S2C(session)->home; + else { + /* + * Copy the directory name, leaving the trailing slash in place, + * so a path of "/foo" doesn't result in an empty string. + */ + WT_RET(__wt_strndup( + session, path, (size_t)(dir - path) + 1, ©)); + path = copy; + } + + WT_SYSCALL_RETRY(( + (fd = open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); + if (ret != 0) + WT_ERR_MSG(session, ret, "%s: directory-sync: open", path); + + ret = __posix_sync(session, fd, path, "directory-sync", true); + + WT_SYSCALL_RETRY(close(fd), tret); + if (tret != 0) { + __wt_err(session, tret, "%s: directory-sync: close", path); + if (ret == 0) + ret = tret; + } +err: __wt_free(session, copy); + return (ret); +#else + WT_UNUSED(session); + WT_UNUSED(path); + return (0); +#endif +} + +/* + * __posix_file_exist -- + * Return if the file exists. + */ +static int +__posix_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + struct stat sb; + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + name = path; + + WT_SYSCALL_RETRY(stat(name, &sb), ret); + if (ret == 0) + *existp = true; + else if (ret == ENOENT) { + *existp = false; + ret = 0; + } else + __wt_err(session, ret, "%s: file-exist: stat", name); + + __wt_free(session, path); + return (ret); +} + +/* + * __posix_file_remove -- + * Remove a file. + */ +static int +__posix_file_remove(WT_SESSION_IMPL *session, const char *name) +{ + WT_DECL_RET; + char *path; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, name, false, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-remove: file has open handles", name); +#endif + + WT_RET(__wt_filename(session, name, &path)); + name = path; + + WT_SYSCALL_RETRY(remove(name), ret); + if (ret != 0) + __wt_err(session, ret, "%s: file-remove: remove", name); + + __wt_free(session, path); + return (ret); +} + +/* + * __posix_file_rename -- + * Rename a file. + */ +static int +__posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +{ + WT_DECL_RET; + char *from_path, *to_path; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, from, false, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", from); + if (__wt_handle_search(session, to, false, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", to); +#endif + + from_path = to_path = NULL; + WT_ERR(__wt_filename(session, from, &from_path)); + from = from_path; + WT_ERR(__wt_filename(session, to, &to_path)); + to = to_path; + + WT_SYSCALL_RETRY(rename(from, to), ret); + if (ret != 0) + __wt_err(session, ret, + "%s to %s: file-rename: rename", from, to); + +err: __wt_free(session, from_path); + __wt_free(session, to_path); + return (ret); +} + +/* + * __posix_file_size -- + * Get the size of a file in bytes, by file name. + */ +static int +__posix_file_size( + WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +{ + struct stat sb; + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + name = path; + + /* + * Optionally don't log errors on ENOENT; some callers of this function + * expect failure in that case and don't want an error message logged. + */ + WT_SYSCALL_RETRY(stat(name, &sb), ret); + if (ret == 0) + *sizep = sb.st_size; + else if (ret != ENOENT || !silent) + __wt_err(session, ret, "%s: file-size: stat", name); + + __wt_free(session, path); + + return (ret); +} + +/* + * __posix_handle_advise -- + * POSIX fadvise. + */ +static int +__posix_handle_advise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ +#if defined(HAVE_POSIX_FADVISE) + WT_DECL_RET; + + /* + * Refuse pre-load when direct I/O is configured for the file, the + * kernel cache isn't interesting. + */ + if (advice == POSIX_MADV_WILLNEED && fh->direct_io) + return (ENOTSUP); + + WT_SYSCALL_RETRY(posix_fadvise(fh->fd, offset, len, advice), ret); + if (ret == 0) + return (0); + + /* + * Treat EINVAL as not-supported, some systems don't support some flags. + * Quietly fail, callers expect not-supported failures. + */ + if (ret == EINVAL) + return (ENOTSUP); + + WT_RET_MSG(session, ret, "%s: handle-advise: posix_fadvise", fh->name); +#else + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(advice); + + /* Quietly fail, callers expect not-supported failures. */ + return (ENOTSUP); +#endif +} + +/* + * __posix_handle_close -- + * ANSI C close/fclose. + */ +static int +__posix_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_DECL_RET; + + if (fh->fp == NULL) { + WT_SYSCALL_RETRY(close(fh->fd), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: handle-close: close", fh->name); + } + + /* If the stream was opened for writing, flush the file. */ + if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) { + ret = __wt_errno(); + __wt_err(session, ret, "%s: handle-close: fflush", fh->name); + } + + /* Close the file. */ + if (fclose(fh->fp) != 0) { + ret = __wt_errno(); + __wt_err(session, ret, "%s: handle-close: fclose", fh->name); + } + return (ret); +} + +/* + * __posix_handle_getc -- + * ANSI C fgetc. + */ +static int +__posix_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, + ENOTSUP, "%s: handle-getc: no stream configured", fh->name); + + *chp = fgetc(fh->fp); + if (*chp != EOF || !ferror(fh->fp)) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name); +} + +/* + * __posix_handle_lock -- + * Lock/unlock a file. + */ +static int +__posix_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +{ + struct flock fl; + WT_DECL_RET; + + /* + * WiredTiger requires this function be able to acquire locks past + * the end of file. + * + * Note we're using fcntl(2) locking: all fcntl locks associated with a + * file for a given process are removed when any file descriptor for the + * file is closed by the process, even if a lock was never requested for + * that file descriptor. + */ + fl.l_start = 0; + fl.l_len = 1; + fl.l_type = lock ? F_WRLCK : F_UNLCK; + fl.l_whence = SEEK_SET; + + WT_SYSCALL_RETRY(fcntl(fh->fd, F_SETLK, &fl), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", fh->name); +} + +/* + * __posix_handle_printf -- + * ANSI C vfprintf. + */ +static int +__posix_handle_printf( + WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, ENOTSUP, + "%s: vfprintf: no stream configured", fh->name); + + if (vfprintf(fh->fp, fmt, ap) >= 0) + return (0); + WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); +} + +/* + * __posix_handle_read -- + * POSIX pread. + */ +static int +__posix_handle_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + size_t chunk; + ssize_t nr; + uint8_t *addr; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break reads larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { + chunk = WT_MIN(len, WT_GIGABYTE); + if ((nr = pread(fh->fd, addr, chunk, offset)) <= 0) + WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(), + "%s: handle-read: pread: failed to read %" + WT_SIZET_FMT " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __posix_handle_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +__posix_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + struct stat sb; + WT_DECL_RET; + + WT_SYSCALL_RETRY(fstat(fh->fd, &sb), ret); + if (ret == 0) { + *sizep = sb.st_size; + return (0); + } + WT_RET_MSG(session, ret, "%s: handle-size: fstat", fh->name); +} + +/* + * __posix_handle_sync -- + * POSIX fflush/fsync. + */ +static int +__posix_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + if (fh->fp == NULL) + return (__posix_sync( + session, fh->fd, fh->name, "handle-sync", block)); + + if (fflush(fh->fp) == 0) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); +} + +/* + * __posix_handle_truncate -- + * POSIX ftruncate. + */ +static int +__posix_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_DECL_RET; + + WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: handle-truncate: ftruncate", fh->name); +} + +/* + * __posix_handle_write -- + * POSIX pwrite. + */ +static int +__posix_handle_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + size_t chunk; + ssize_t nw; + const uint8_t *addr; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break writes larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { + chunk = WT_MIN(len, WT_GIGABYTE); + if ((nw = pwrite(fh->fd, addr, chunk, offset)) < 0) + WT_RET_MSG(session, __wt_errno(), + "%s: handle-write: pwrite: failed to write %" + WT_SIZET_FMT " bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __posix_handle_open_cloexec -- + * Prevent child access to file handles. + */ +static inline int +__posix_handle_open_cloexec(WT_SESSION_IMPL *session, int fd, const char *name) +{ +#if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC) + int f; + + /* + * Security: + * The application may spawn a new process, and we don't want another + * process to have access to our file handles. There's an obvious race + * between the open and this call, prefer the flag to open if available. + */ + if ((f = fcntl(fd, F_GETFD)) == -1 || + fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1) + WT_RET_MSG(session, __wt_errno(), + "%s: handle-open: fcntl", name); + return (0); +#else + WT_UNUSED(session); + WT_UNUSED(fd); + WT_UNUSED(name); + return (0); +#endif +} + +/* + * __posix_handle_open -- + * Open a file handle. + */ +static int +__posix_handle_open(WT_SESSION_IMPL *session, + WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + mode_t mode; + int f, fd, tret; + bool direct_io; + const char *stream_mode; + + conn = S2C(session); + direct_io = false; + + /* Set up error handling. */ + fh->fd = fd = -1; + fh->fp = NULL; + + if (file_type == WT_FILE_TYPE_DIRECTORY) { + f = O_RDONLY; +#ifdef O_CLOEXEC + /* + * Security: + * The application may spawn a new process, and we don't want + * another process to have access to our file handles. + */ + f |= O_CLOEXEC; +#endif + WT_SYSCALL_RETRY(( + (fd = open(name, f, 0444)) == -1 ? 1 : 0), ret); + if (ret != 0) + WT_ERR_MSG(session, ret, "%s: handle-open: open", name); + WT_ERR(__posix_handle_open_cloexec(session, fd, name)); + goto directory_open; + } + + f = LF_ISSET(WT_OPEN_READONLY) ? O_RDONLY : O_RDWR; + if (LF_ISSET(WT_OPEN_CREATE)) { + f |= O_CREAT; + if (LF_ISSET(WT_OPEN_EXCLUSIVE)) + f |= O_EXCL; + mode = 0666; + } else + mode = 0; + +#ifdef O_BINARY + /* Windows clones: we always want to treat the file as a binary. */ + f |= O_BINARY; +#endif +#ifdef O_CLOEXEC + /* + * Security: + * The application may spawn a new process, and we don't want another + * process to have access to our file handles. + */ + f |= O_CLOEXEC; +#endif +#ifdef O_DIRECT + /* + * Direct I/O: file-type is a flag from the set of possible flags stored + * in the connection handle during configuration, check for a match. + * Also, "direct_io=checkpoint" configures direct I/O for readonly data + * files. + */ + if (FLD_ISSET(conn->direct_io, file_type) || + (LF_ISSET(WT_OPEN_READONLY) && + file_type == WT_FILE_TYPE_DATA && + FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) { + f |= O_DIRECT; + direct_io = true; + } +#endif + fh->direct_io = direct_io; +#ifdef O_NOATIME + /* Avoid updating metadata for read-only workloads. */ + if (file_type == WT_FILE_TYPE_DATA) + f |= O_NOATIME; +#endif + + if (file_type == WT_FILE_TYPE_LOG && + FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { +#ifdef O_DSYNC + f |= O_DSYNC; +#elif defined(O_SYNC) + f |= O_SYNC; +#else + WT_ERR_MSG(session, ENOTSUP, + "unsupported log sync mode configured"); +#endif + } + + WT_SYSCALL_RETRY(((fd = open(name, f, mode)) == -1 ? 1 : 0), ret); + if (ret != 0) + WT_ERR_MSG(session, ret, + direct_io ? + "%s: handle-open: open: failed with direct I/O configured, " + "some filesystem types do not support direct I/O" : + "%s: handle-open: open", name); + WT_ERR(__posix_handle_open_cloexec(session, fd, name)); + + /* Disable read-ahead on trees: it slows down random read workloads. */ +#if defined(HAVE_POSIX_FADVISE) + if (file_type == WT_FILE_TYPE_DATA) { + WT_SYSCALL_RETRY( + posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM), ret); + if (ret != 0) + WT_ERR_MSG(session, ret, + "%s: handle-open: posix_fadvise", name); + } +#endif + + /* Optionally configure a stdio stream API. */ + switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { + case WT_STREAM_APPEND: + stream_mode = "a"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case WT_STREAM_READ: + stream_mode = "r"; + break; + case WT_STREAM_WRITE: + stream_mode = "w"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case 0: + default: + stream_mode = NULL; + break; + } + if (stream_mode != NULL) { + if ((fh->fp = fdopen(fd, stream_mode)) == NULL) + WT_ERR_MSG(session, __wt_errno(), + "%s: handle-open: fdopen", name); + if (LF_ISSET(WT_STREAM_LINE_BUFFER)) + __wt_stream_set_line_buffer(fh->fp); + } + +directory_open: + fh->fd = fd; + + /* Configure fallocate calls. */ + __wt_posix_handle_allocate_configure(session, fh); + + fh->fh_advise = __posix_handle_advise; + fh->fh_allocate = __wt_posix_handle_allocate; + fh->fh_close = __posix_handle_close; + fh->fh_getc = __posix_handle_getc; + fh->fh_lock = __posix_handle_lock; + fh->fh_map = __wt_posix_map; + fh->fh_map_discard = __wt_posix_map_discard; + fh->fh_map_preload = __wt_posix_map_preload; + fh->fh_map_unmap = __wt_posix_map_unmap; + fh->fh_printf = __posix_handle_printf; + fh->fh_read = __posix_handle_read; + fh->fh_size = __posix_handle_size; + fh->fh_sync = __posix_handle_sync; + fh->fh_truncate = __posix_handle_truncate; + fh->fh_write = __posix_handle_write; + + return (0); + +err: if (fd != -1) { + WT_SYSCALL_RETRY(close(fd), tret); + if (tret != 0) + __wt_err(session, tret, "%s: handle-open: close", name); + } + return (ret); +} + +/* + * __wt_os_posix -- + * Initialize a POSIX configuration. + */ +int +__wt_os_posix(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + + /* Initialize the POSIX jump table. */ + conn->file_directory_list = __wt_posix_directory_list; + conn->file_directory_sync = __posix_directory_sync; + conn->file_exist = __posix_file_exist; + conn->file_remove = __posix_file_remove; + conn->file_rename = __posix_file_rename; + conn->file_size = __posix_file_size; + conn->handle_open = __posix_handle_open; + + return (0); +} + +/* + * __wt_os_posix_cleanup -- + * Discard a POSIX configuration. + */ +int +__wt_os_posix_cleanup(WT_SESSION_IMPL *session) +{ + WT_UNUSED(session); + + return (0); +} diff --git a/src/third_party/wiredtiger/src/os_posix/os_fsync.c b/src/third_party/wiredtiger/src/os_posix/os_fsync.c deleted file mode 100644 index 0bd0359338b..00000000000 --- a/src/third_party/wiredtiger/src/os_posix/os_fsync.c +++ /dev/null @@ -1,171 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_handle_sync -- - * Flush a file handle. - */ -static int -__wt_handle_sync(int fd) -{ - WT_DECL_RET; - -#if defined(F_FULLFSYNC) - /* - * OS X fsync documentation: - * "Note that while fsync() will flush all data from the host to the - * drive (i.e. the "permanent storage device"), the drive itself may - * not physically write the data to the platters for quite some time - * and it may be written in an out-of-order sequence. For applications - * that require tighter guarantees about the integrity of their data, - * Mac OS X provides the F_FULLFSYNC fcntl. The F_FULLFSYNC fcntl asks - * the drive to flush all buffered data to permanent storage." - * - * OS X F_FULLFSYNC fcntl documentation: - * "This is currently implemented on HFS, MS-DOS (FAT), and Universal - * Disk Format (UDF) file systems." - */ - WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0), ret); - if (ret == 0) - return (0); - /* - * Assume F_FULLFSYNC failed because the file system doesn't support it - * and fallback to fsync. - */ -#endif -#if defined(HAVE_FDATASYNC) - WT_SYSCALL_RETRY(fdatasync(fd), ret); -#else - WT_SYSCALL_RETRY(fsync(fd), ret); -#endif - return (ret); -} - -/* - * __wt_directory_sync_fh -- - * Flush a directory file handle. We don't use __wt_fsync because - * most file systems don't require this step and we don't want to - * penalize them by calling fsync. - */ -int -__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) -{ -#ifdef __linux__ - WT_DECL_RET; - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - if ((ret = __wt_handle_sync(fh->fd)) == 0) - return (0); - WT_RET_MSG(session, ret, "%s: fsync", fh->name); -#else - WT_UNUSED(session); - WT_UNUSED(fh); - return (0); -#endif -} - -/* - * __wt_directory_sync -- - * Flush a directory to ensure a file creation is durable. - */ -int -__wt_directory_sync(WT_SESSION_IMPL *session, const char *path) -{ -#ifdef __linux__ - WT_DECL_RET; - int fd, tret; - const char *dir; - char *copy; - - /* - * POSIX 1003.1 does not require that fsync of a file handle ensures the - * entry in the directory containing the file has also reached disk (and - * there are historic Linux filesystems requiring this), do an explicit - * fsync on a file descriptor for the directory to be sure. - */ - copy = NULL; - if (path == NULL || (dir = strrchr(path, '/')) == NULL) - path = S2C(session)->home; - else { - /* - * Copy the directory name, leaving the trailing slash in place, - * so a path of "/foo" doesn't result in an empty string. - */ - WT_RET(__wt_strndup( - session, path, (size_t)(dir - path) + 1, ©)); - path = copy; - } - - WT_SYSCALL_RETRY(((fd = - open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); - __wt_free(session, copy); - if (ret != 0) - WT_RET_MSG(session, ret, "%s: open", path); - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - if ((ret = __wt_handle_sync(fd)) != 0) - WT_ERR_MSG(session, ret, "%s: fsync", path); - -err: WT_SYSCALL_RETRY(close(fd), tret); - if (tret != 0) - __wt_err(session, tret, "%s: close", path); - WT_TRET(tret); - return (ret); -#else - WT_UNUSED(session); - WT_UNUSED(path); - return (0); -#endif -} - -/* - * __wt_fsync -- - * Flush a file handle. - */ -int -__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_DECL_RET; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: fsync", fh->name)); - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || - WT_STRING_MATCH(fh->name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))); - if ((ret = __wt_handle_sync(fh->fd)) == 0) - return (0); - WT_RET_MSG(session, ret, "%s fsync error", fh->name); -} - -/* - * __wt_fsync_async -- - * Flush a file handle and don't wait for the result. - */ -int -__wt_fsync_async(WT_SESSION_IMPL *session, WT_FH *fh) -{ -#ifdef HAVE_SYNC_FILE_RANGE - WT_DECL_RET; - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s: sync_file_range", fh->name)); - - WT_SYSCALL_RETRY(sync_file_range(fh->fd, - (off64_t)0, (off64_t)0, SYNC_FILE_RANGE_WRITE), ret); - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: sync_file_range", fh->name); -#else - WT_UNUSED(session); - WT_UNUSED(fh); - return (0); -#endif -} diff --git a/src/third_party/wiredtiger/src/os_posix/os_ftruncate.c b/src/third_party/wiredtiger/src/os_posix/os_ftruncate.c deleted file mode 100644 index 94d6cba3bf5..00000000000 --- a/src/third_party/wiredtiger/src/os_posix/os_ftruncate.c +++ /dev/null @@ -1,26 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_ftruncate -- - * Truncate a file. - */ -int -__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) -{ - WT_DECL_RET; - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret); - if (ret == 0) - return (0); - - WT_RET_MSG(session, ret, "%s ftruncate error", fh->name); -} diff --git a/src/third_party/wiredtiger/src/os_posix/os_map.c b/src/third_party/wiredtiger/src/os_posix/os_map.c index 42aeeac4a5e..de28891ffd1 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_map.c +++ b/src/third_party/wiredtiger/src/os_posix/os_map.c @@ -9,58 +9,74 @@ #include "wt_internal.h" /* - * __wt_mmap -- + * __wt_posix_map -- * Map a file into memory. */ int -__wt_mmap(WT_SESSION_IMPL *session, +__wt_posix_map(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie) { + size_t len; + wt_off_t file_size; void *map; - size_t orig_size; WT_UNUSED(mappingcookie); + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + + /* + * Mapping isn't possible if direct I/O configured for the file, the + * Linux open(2) documentation says applications should avoid mixing + * mmap(2) of files with direct I/O to the same files. + */ + if (fh->direct_io) + return (ENOTSUP); + /* - * Record the current size and only map and set that as the length, it - * could change between the map call and when we set the return length. - * For the same reason we could actually map past the end of the file; - * we don't read bytes past the end of the file though, so as long as - * the map call succeeds, it's all OK. + * There's no locking here to prevent the underlying file from changing + * underneath us, our caller needs to ensure consistency of the mapped + * region vs. any other file activity. */ - orig_size = (size_t)fh->size; - if ((map = mmap(NULL, orig_size, + WT_RET(__wt_filesize(session, fh, &file_size)); + len = (size_t)file_size; + + (void)__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len); + + if ((map = mmap(NULL, len, PROT_READ, #ifdef MAP_NOCORE MAP_NOCORE | #endif MAP_PRIVATE, - fh->fd, (wt_off_t)0)) == MAP_FAILED) { - WT_RET_MSG(session, __wt_errno(), - "%s map error: failed to map %" WT_SIZET_FMT " bytes", - fh->name, orig_size); - } - (void)__wt_verbose(session, WT_VERB_FILEOPS, - "%s: map %p: %" WT_SIZET_FMT " bytes", fh->name, map, orig_size); + fh->fd, (wt_off_t)0)) == MAP_FAILED) + WT_RET_MSG(session, + __wt_errno(), "%s: memory-map: mmap", fh->name); *(void **)mapp = map; - *lenp = orig_size; + *lenp = len; return (0); } +#ifdef HAVE_POSIX_MADVISE /* - * __wt_mmap_preload -- + * __posix_map_preload_madvise -- * Cause a section of a memory map to be faulted in. */ -int -__wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size) +static int +__posix_map_preload_madvise( + WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size) { -#ifdef HAVE_POSIX_MADVISE - /* Linux requires the address be aligned to a 4KB boundary. */ - WT_CONNECTION_IMPL *conn = S2C(session); - WT_BM *bm = S2BT(session)->bm; + WT_BM *bm; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; - void *blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1)); + void *blk; + + conn = S2C(session); + bm = S2BT(session)->bm; + + /* Linux requires the address be aligned to a 4KB boundary. */ + blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1)); size += WT_PTRDIFF(p, blk); /* XXX proxy for "am I doing a scan?" -- manual read-ahead */ @@ -79,59 +95,99 @@ __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size) */ size &= ~(size_t)(conn->page_size - 1); - if (size > (size_t)conn->page_size && - (ret = posix_madvise(blk, size, POSIX_MADV_WILLNEED)) != 0) - WT_RET_MSG(session, ret, "posix_madvise will need"); + if (size <= (size_t)conn->page_size || + (ret = posix_madvise(blk, size, POSIX_MADV_WILLNEED)) == 0) + return (0); + WT_RET_MSG(session, ret, + "%s: memory-map preload: posix_madvise: POSIX_MADV_WILLNEED", + fh->name); +} +#endif + +/* + * __wt_posix_map_preload -- + * Cause a section of a memory map to be faulted in. + */ +int +__wt_posix_map_preload( + WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + +#ifdef HAVE_POSIX_MADVISE + return (__posix_map_preload_madvise(session, fh, p, size)); #else - WT_UNUSED(session); + WT_UNUSED(fh); WT_UNUSED(p); WT_UNUSED(size); + return (ENOTSUP); #endif - - return (0); } +#ifdef HAVE_POSIX_MADVISE /* - * __wt_mmap_discard -- + * __posix_map_discard_madvise -- * Discard a chunk of the memory map. */ -int -__wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size) +static int +__posix_map_discard_madvise( + WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) { -#ifdef HAVE_POSIX_MADVISE - /* Linux requires the address be aligned to a 4KB boundary. */ - WT_CONNECTION_IMPL *conn = S2C(session); + WT_CONNECTION_IMPL *conn; WT_DECL_RET; - void *blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1)); + void *blk; + + conn = S2C(session); + + /* Linux requires the address be aligned to a 4KB boundary. */ + blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1)); size += WT_PTRDIFF(p, blk); - if ((ret = posix_madvise(blk, size, POSIX_MADV_DONTNEED)) != 0) - WT_RET_MSG(session, ret, "posix_madvise don't need"); + if ((ret = posix_madvise(blk, size, POSIX_MADV_DONTNEED)) == 0) + return (0); + WT_RET_MSG(session, ret, + "%s: memory-map discard: posix_madvise: POSIX_MADV_DONTNEED", + fh->name); +} +#endif + +/* + * __wt_posix_map_discard -- + * Discard a chunk of the memory map. + */ +int +__wt_posix_map_discard( + WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) +{ + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + +#ifdef HAVE_POSIX_MADVISE + return (__posix_map_discard_madvise(session, fh, p, size)); #else - WT_UNUSED(session); + WT_UNUSED(fh); WT_UNUSED(p); WT_UNUSED(size); + return (ENOTSUP); #endif - return (0); } /* - * __wt_munmap -- + * __wt_posix_map_unmap -- * Remove a memory mapping. */ int -__wt_munmap(WT_SESSION_IMPL *session, +__wt_posix_map_unmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, void **mappingcookie) { WT_UNUSED(mappingcookie); - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, - "%s: unmap %p: %" WT_SIZET_FMT " bytes", fh->name, map, len)); + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_IN_MEMORY)); + + (void)__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len); if (munmap(map, len) == 0) return (0); - WT_RET_MSG(session, __wt_errno(), - "%s unmap error: failed to unmap %" WT_SIZET_FMT " bytes", - fh->name, len); + WT_RET_MSG(session, __wt_errno(), "%s: memory-unmap: munmap", fh->name); } diff --git a/src/third_party/wiredtiger/src/os_posix/os_open.c b/src/third_party/wiredtiger/src/os_posix/os_open.c deleted file mode 100644 index 219b26c2fa1..00000000000 --- a/src/third_party/wiredtiger/src/os_posix/os_open.c +++ /dev/null @@ -1,253 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __open_directory -- - * Open up a file handle to a directory. - */ -static int -__open_directory(WT_SESSION_IMPL *session, char *path, int *fd) -{ - WT_DECL_RET; - - WT_SYSCALL_RETRY(((*fd = - open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); - if (ret != 0) - WT_RET_MSG(session, ret, "%s: open_directory", path); - return (ret); -} - -/* - * __wt_open -- - * Open a file handle. - */ -int -__wt_open(WT_SESSION_IMPL *session, - const char *name, bool ok_create, bool exclusive, int dio_type, WT_FH **fhp) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_FH *fh, *tfh; - mode_t mode; - uint64_t bucket, hash; - int f, fd; - bool direct_io, matched; - char *path; - - conn = S2C(session); - direct_io = false; - fh = NULL; - fd = -1; - path = NULL; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: open", name)); - - /* Increment the reference count if we already have the file open. */ - matched = false; - hash = __wt_hash_city64(name, strlen(name)); - bucket = hash % WT_HASH_ARRAY_SIZE; - __wt_spin_lock(session, &conn->fh_lock); - TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) { - if (strcmp(name, tfh->name) == 0) { - ++tfh->ref; - *fhp = tfh; - matched = true; - break; - } - } - __wt_spin_unlock(session, &conn->fh_lock); - if (matched) - return (0); - - WT_RET(__wt_filename(session, name, &path)); - - if (dio_type == WT_FILE_TYPE_DIRECTORY) { - WT_ERR(__open_directory(session, path, &fd)); - goto setupfh; - } - - /* - * If this is a read-only connection, open all files read-only - * except the lock file. - */ - if (F_ISSET(conn, WT_CONN_READONLY) && - !WT_STRING_MATCH(name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))) - f = O_RDONLY; - else - f = O_RDWR; -#ifdef O_BINARY - /* Windows clones: we always want to treat the file as a binary. */ - f |= O_BINARY; -#endif -#ifdef O_CLOEXEC - /* - * Security: - * The application may spawn a new process, and we don't want another - * process to have access to our file handles. - */ - f |= O_CLOEXEC; -#endif -#ifdef O_NOATIME - /* Avoid updating metadata for read-only workloads. */ - if (dio_type == WT_FILE_TYPE_DATA || - dio_type == WT_FILE_TYPE_CHECKPOINT) - f |= O_NOATIME; -#endif - - if (ok_create) { - WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) || - WT_STRING_MATCH(name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))); - f |= O_CREAT; - if (exclusive) - f |= O_EXCL; - mode = 0666; - } else - mode = 0; - -#ifdef O_DIRECT - if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) { - f |= O_DIRECT; - direct_io = true; - } -#endif - if (dio_type == WT_FILE_TYPE_LOG && - FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) -#ifdef O_DSYNC - f |= O_DSYNC; -#elif defined(O_SYNC) - f |= O_SYNC; -#else - WT_ERR_MSG(session, ENOTSUP, - "Unsupported log sync mode requested"); -#endif - WT_SYSCALL_RETRY(((fd = open(path, f, mode)) == -1 ? 1 : 0), ret); - if (ret != 0) - WT_ERR_MSG(session, ret, - direct_io ? - "%s: open failed with direct I/O configured, some " - "filesystem types do not support direct I/O" : "%s", path); - -setupfh: -#if defined(HAVE_FCNTL) && defined(FD_CLOEXEC) && !defined(O_CLOEXEC) - /* - * Security: - * The application may spawn a new process, and we don't want another - * process to have access to our file handles. There's an obvious - * race here, so we prefer the flag to open if available. - */ - if ((f = fcntl(fd, F_GETFD)) == -1 || - fcntl(fd, F_SETFD, f | FD_CLOEXEC) == -1) - WT_ERR_MSG(session, __wt_errno(), "%s: fcntl", name); -#endif - -#if defined(HAVE_POSIX_FADVISE) - /* Disable read-ahead on trees: it slows down random read workloads. */ - if (dio_type == WT_FILE_TYPE_DATA || - dio_type == WT_FILE_TYPE_CHECKPOINT) - WT_ERR(posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM)); -#endif - - WT_ERR(__wt_calloc_one(session, &fh)); - WT_ERR(__wt_strdup(session, name, &fh->name)); - fh->name_hash = hash; - fh->fd = fd; - fh->ref = 1; - fh->direct_io = direct_io; - - /* Set the file's size. */ - WT_ERR(__wt_filesize(session, fh, &fh->size)); - - /* Configure file extension. */ - if (dio_type == WT_FILE_TYPE_DATA || - dio_type == WT_FILE_TYPE_CHECKPOINT) - fh->extend_len = conn->data_extend_len; - - /* Configure fallocate/posix_fallocate calls. */ - __wt_fallocate_config(session, fh); - - /* - * Repeat the check for a match, but then link onto the database's list - * of files. - */ - matched = false; - __wt_spin_lock(session, &conn->fh_lock); - TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) { - if (strcmp(name, tfh->name) == 0) { - ++tfh->ref; - *fhp = tfh; - matched = true; - break; - } - } - if (!matched) { - WT_CONN_FILE_INSERT(conn, fh, bucket); - (void)__wt_atomic_add32(&conn->open_file_count, 1); - *fhp = fh; - } - __wt_spin_unlock(session, &conn->fh_lock); - if (matched) { -err: if (fh != NULL) { - __wt_free(session, fh->name); - __wt_free(session, fh); - } - if (fd != -1) - (void)close(fd); - } - - __wt_free(session, path); - return (ret); -} - -/* - * __wt_close -- - * Close a file handle. - */ -int -__wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_FH *fh; - uint64_t bucket; - - conn = S2C(session); - - if (*fhp == NULL) - return (0); - fh = *fhp; - *fhp = NULL; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: close", fh->name)); - - __wt_spin_lock(session, &conn->fh_lock); - if (fh == NULL || fh->ref == 0 || --fh->ref > 0) { - __wt_spin_unlock(session, &conn->fh_lock); - return (0); - } - - /* Remove from the list. */ - bucket = fh->name_hash % WT_HASH_ARRAY_SIZE; - WT_CONN_FILE_REMOVE(conn, fh, bucket); - (void)__wt_atomic_sub32(&conn->open_file_count, 1); - - __wt_spin_unlock(session, &conn->fh_lock); - - /* Discard the memory. */ - if (close(fh->fd) != 0) { - ret = __wt_errno(); - __wt_err(session, ret, "close: %s", fh->name); - } - - __wt_free(session, fh->name); - __wt_free(session, fh); - return (ret); -} diff --git a/src/third_party/wiredtiger/src/os_posix/os_remove.c b/src/third_party/wiredtiger/src/os_posix/os_remove.c deleted file mode 100644 index eb2e37fdc38..00000000000 --- a/src/third_party/wiredtiger/src/os_posix/os_remove.c +++ /dev/null @@ -1,69 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __remove_file_check -- - * Check if the file is currently open before removing it. - */ -static void -__remove_file_check(WT_SESSION_IMPL *session, const char *name) -{ -#ifdef HAVE_DIAGNOSTIC - WT_CONNECTION_IMPL *conn; - WT_FH *fh; - uint64_t bucket; - - conn = S2C(session); - WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY)); - fh = NULL; - bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE; - - /* - * Check if the file is open: it's an error if it is, since a higher - * level should have closed it before removing. - */ - __wt_spin_lock(session, &conn->fh_lock); - TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) - if (strcmp(name, fh->name) == 0) - break; - __wt_spin_unlock(session, &conn->fh_lock); - - WT_ASSERT(session, fh == NULL); -#else - WT_UNUSED(session); - WT_UNUSED(name); -#endif -} - -/* - * __wt_remove -- - * Remove a file. - */ -int -__wt_remove(WT_SESSION_IMPL *session, const char *name) -{ - WT_DECL_RET; - char *path; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: remove", name)); - - __remove_file_check(session, name); - - WT_RET(__wt_filename(session, name, &path)); - - WT_SYSCALL_RETRY(remove(path), ret); - - __wt_free(session, path); - - if (ret == 0 || ret == ENOENT) - return (0); - - WT_RET_MSG(session, ret, "%s: remove", name); -} diff --git a/src/third_party/wiredtiger/src/os_posix/os_rename.c b/src/third_party/wiredtiger/src/os_posix/os_rename.c deleted file mode 100644 index 8ec4ee3aa23..00000000000 --- a/src/third_party/wiredtiger/src/os_posix/os_rename.c +++ /dev/null @@ -1,40 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_rename -- - * Rename a file. - */ -int -__wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to) -{ - WT_DECL_RET; - char *from_path, *to_path; - - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "rename %s to %s", from, to)); - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - from_path = to_path = NULL; - - WT_RET(__wt_filename(session, from, &from_path)); - WT_TRET(__wt_filename(session, to, &to_path)); - - if (ret == 0) - WT_SYSCALL_RETRY(rename(from_path, to_path), ret); - - __wt_free(session, from_path); - __wt_free(session, to_path); - - if (ret == 0) - return (0); - - WT_RET_MSG(session, ret, "rename %s to %s", from, to); -} diff --git a/src/third_party/wiredtiger/src/os_posix/os_rw.c b/src/third_party/wiredtiger/src/os_posix/os_rw.c deleted file mode 100644 index 3d49fa7e712..00000000000 --- a/src/third_party/wiredtiger/src/os_posix/os_rw.c +++ /dev/null @@ -1,90 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_read -- - * Read a chunk. - */ -int -__wt_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) -{ - size_t chunk; - ssize_t nr; - uint8_t *addr; - - WT_STAT_FAST_CONN_INCR(session, read_io); - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, - "%s: read %" WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, len, (uintmax_t)offset)); - - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break reads larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { - chunk = WT_MIN(len, WT_GIGABYTE); - if ((nr = pread(fh->fd, addr, chunk, offset)) <= 0) - WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(), - "%s read error: failed to read %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} - -/* - * __wt_write -- - * Write a chunk. - */ -int -__wt_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) -{ - size_t chunk; - ssize_t nw; - const uint8_t *addr; - - WT_STAT_FAST_CONN_INCR(session, write_io); - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, - "%s: write %" WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, len, (uintmax_t)offset)); - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || - WT_STRING_MATCH(fh->name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))); - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break writes larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { - chunk = WT_MIN(len, WT_GIGABYTE); - if ((nw = pwrite(fh->fd, addr, chunk, offset)) < 0) - WT_RET_MSG(session, __wt_errno(), - "%s write error: failed to write %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} diff --git a/src/third_party/wiredtiger/src/os_posix/os_setvbuf.c b/src/third_party/wiredtiger/src/os_posix/os_setvbuf.c new file mode 100644 index 00000000000..d6107115eb3 --- /dev/null +++ b/src/third_party/wiredtiger/src/os_posix/os_setvbuf.c @@ -0,0 +1,34 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_stream_set_line_buffer -- + * Set line buffering on a stream. + */ +void +__wt_stream_set_line_buffer(FILE *fp) +{ + /* + * This function exists because MSVC doesn't support buffer sizes of 0 + * to the setvbuf call. To avoid re-introducing the bug, we have helper + * functions and disallow calling setvbuf directly in WiredTiger code. + */ + (void)setvbuf(fp, NULL, _IOLBF, 1024); +} + +/* + * __wt_stream_set_no_buffer -- + * Turn off buffering on a stream. + */ +void +__wt_stream_set_no_buffer(FILE *fp) +{ + (void)setvbuf(fp, NULL, _IONBF, 0); +} diff --git a/src/third_party/wiredtiger/src/os_posix/os_stdio.c b/src/third_party/wiredtiger/src/os_posix/os_stdio.c deleted file mode 100644 index 65a0f40a659..00000000000 --- a/src/third_party/wiredtiger/src/os_posix/os_stdio.c +++ /dev/null @@ -1,126 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_fopen -- - * Open a FILE handle. - */ -int -__wt_fopen(WT_SESSION_IMPL *session, - const char *name, WT_FHANDLE_MODE mode_flag, u_int flags, FILE **fpp) -{ - WT_DECL_RET; - const char *mode, *path; - char *pathbuf; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: fopen", name)); - - pathbuf = NULL; - if (LF_ISSET(WT_FOPEN_FIXED)) - path = name; - else { - WT_RET(__wt_filename(session, name, &pathbuf)); - path = pathbuf; - } - - mode = NULL; - switch (mode_flag) { - case WT_FHANDLE_APPEND: - mode = WT_FOPEN_APPEND; - break; - case WT_FHANDLE_READ: - mode = WT_FOPEN_READ; - break; - case WT_FHANDLE_WRITE: - mode = WT_FOPEN_WRITE; - break; - } - *fpp = fopen(path, mode); - if (*fpp == NULL) - ret = __wt_errno(); - - __wt_free(session, pathbuf); - - if (ret == 0) - return (0); - WT_RET_MSG(session, ret, "%s: fopen", name); -} - -/* - * __wt_vfprintf -- - * Vfprintf for a FILE handle. - */ -int -__wt_vfprintf(FILE *fp, const char *fmt, va_list ap) -{ - return (vfprintf(fp, fmt, ap) < 0 ? __wt_errno() : 0); -} - -/* - * __wt_fprintf -- - * Fprintf for a FILE handle. - */ -int -__wt_fprintf(FILE *fp, const char *fmt, ...) - WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3))) -{ - WT_DECL_RET; - va_list ap; - - va_start(ap, fmt); - ret = __wt_vfprintf(fp, fmt, ap); - va_end(ap); - - return (ret); -} - -/* - * __wt_fflush -- - * Flush a FILE handle. - */ -int -__wt_fflush(FILE *fp) -{ - /* Flush the handle. */ - return (fflush(fp) == 0 ? 0 : __wt_errno()); -} - -/* - * __wt_fclose -- - * Close a FILE handle. - */ -int -__wt_fclose(FILE **fpp, WT_FHANDLE_MODE mode_flag) -{ - FILE *fp; - WT_DECL_RET; - - if (*fpp == NULL) - return (0); - - fp = *fpp; - *fpp = NULL; - - /* - * If the handle was opened for writing, flush the file to the backing - * OS buffers, then flush the OS buffers to the backing disk. - */ - if (mode_flag == WT_FHANDLE_APPEND || mode_flag == WT_FHANDLE_WRITE) { - ret = __wt_fflush(fp); - if (fsync(fileno(fp)) != 0) - WT_TRET(__wt_errno()); - } - - /* Close the handle. */ - if (fclose(fp) != 0) - WT_TRET(__wt_errno()); - - return (ret); -} diff --git a/src/third_party/wiredtiger/src/os_win/os_dir.c b/src/third_party/wiredtiger/src/os_win/os_dir.c index 00ec4f252e4..64eae60983c 100644 --- a/src/third_party/wiredtiger/src/os_win/os_dir.c +++ b/src/third_party/wiredtiger/src/os_win/os_dir.c @@ -9,13 +9,12 @@ #include "wt_internal.h" /* - * __wt_dirlist -- - * Get a list of files from a directory, optionally filtered by - * a given prefix. + * __wt_win_directory_list -- + * Get a list of files from a directory, MSVC version. */ int -__wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, - uint32_t flags, char ***dirlist, u_int *countp) +__wt_win_directory_list(WT_SESSION_IMPL *session, const char *dir, + const char *prefix, uint32_t flags, char ***dirlist, u_int *countp) { HANDLE findhandle; WIN32_FIND_DATA finddata; @@ -29,72 +28,60 @@ __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix, *dirlist = NULL; *countp = 0; - findhandle = INVALID_HANDLE_VALUE; - count = 0; - WT_RET(__wt_filename(session, dir, &path)); pathlen = strlen(path); - if (path[pathlen - 1] == '\\') { + if (path[pathlen - 1] == '\\') path[pathlen - 1] = '\0'; - } - WT_ERR(__wt_scr_alloc(session, pathlen + 3, &pathbuf)); WT_ERR(__wt_buf_fmt(session, pathbuf, "%s\\*", path)); + findhandle = INVALID_HANDLE_VALUE; dirallocsz = 0; dirsz = 0; entries = NULL; - if (flags == 0) - LF_SET(WT_DIRLIST_INCLUDE); - - WT_ERR(__wt_verbose(session, WT_VERB_FILEOPS, - "wt_dirlist of %s %s prefix %s", - pathbuf->data, LF_ISSET(WT_DIRLIST_INCLUDE) ? "include" : "exclude", - prefix == NULL ? "all" : prefix)); findhandle = FindFirstFileA(pathbuf->data, &finddata); + if (findhandle == INVALID_HANDLE_VALUE) + WT_ERR_MSG(session, __wt_getlasterror(), + "%s: directory-list: FindFirstFile", pathbuf->data); - if (INVALID_HANDLE_VALUE == findhandle) - WT_ERR_MSG(session, __wt_errno(), "%s: FindFirstFile", - pathbuf->data); - else { - do { + count = 0; + do { + /* + * Skip . and .. + */ + if (strcmp(finddata.cFileName, ".") == 0 || + strcmp(finddata.cFileName, "..") == 0) + continue; + + /* The list of files is optionally filtered by a prefix. */ + match = false; + if (prefix != NULL && + ((LF_ISSET(WT_DIRLIST_INCLUDE) && + WT_PREFIX_MATCH(finddata.cFileName, prefix)) || + (LF_ISSET(WT_DIRLIST_EXCLUDE) && + !WT_PREFIX_MATCH(finddata.cFileName, prefix)))) + match = true; + if (prefix == NULL || match) { /* - * Skip . and .. + * We have a file name we want to return. */ - if (strcmp(finddata.cFileName, ".") == 0 || - strcmp(finddata.cFileName, "..") == 0) - continue; - match = false; - if (prefix != NULL && - ((LF_ISSET(WT_DIRLIST_INCLUDE) && - WT_PREFIX_MATCH(finddata.cFileName, prefix)) || - (LF_ISSET(WT_DIRLIST_EXCLUDE) && - !WT_PREFIX_MATCH(finddata.cFileName, prefix)))) - match = true; - if (prefix == NULL || match) { - /* - * We have a file name we want to return. - */ - count++; - if (count > dirsz) { - dirsz += WT_DIR_ENTRY; - WT_ERR(__wt_realloc_def(session, - &dirallocsz, dirsz, &entries)); - } - WT_ERR(__wt_strdup(session, - finddata.cFileName, &entries[count - 1])); + count++; + if (count > dirsz) { + dirsz += WT_DIR_ENTRY; + WT_ERR(__wt_realloc_def(session, + &dirallocsz, dirsz, &entries)); } - } while (FindNextFileA(findhandle, &finddata) != 0); - } - + WT_ERR(__wt_strdup(session, + finddata.cFileName, &entries[count - 1])); + } + } while (FindNextFileA(findhandle, &finddata) != 0); if (count > 0) *dirlist = entries; *countp = count; -err: - if (findhandle != INVALID_HANDLE_VALUE) +err: if (findhandle != INVALID_HANDLE_VALUE) (void)FindClose(findhandle); __wt_free(session, path); __wt_scr_free(session, &pathbuf); @@ -108,5 +95,7 @@ err: __wt_free(session, entries); } - WT_RET_MSG(session, ret, "dirlist %s prefix %s", dir, prefix); + WT_RET_MSG(session, ret, + "%s: directory-list, prefix \"%s\"", + dir, prefix == NULL ? "" : prefix); } diff --git a/src/third_party/wiredtiger/src/os_win/os_dlopen.c b/src/third_party/wiredtiger/src/os_win/os_dlopen.c index 0bad39d681d..ce949e4ea5f 100644 --- a/src/third_party/wiredtiger/src/os_win/os_dlopen.c +++ b/src/third_party/wiredtiger/src/os_win/os_dlopen.c @@ -23,18 +23,17 @@ __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) /* NULL means load from the current binary */ if (path == NULL) { - ret = GetModuleHandleExA(0, NULL, (HMODULE *)&dlh->handle); - if (ret == FALSE) - WT_ERR_MSG(session, - __wt_errno(), "GetModuleHandleEx(%s): %s", path, 0); + if (GetModuleHandleExA( + 0, NULL, (HMODULE *)&dlh->handle) == FALSE) { + ret = __wt_getlasterror(); + WT_ERR_MSG(session, ret, + "GetModuleHandleEx(%s): %s", path, 0); + } } else { // TODO: load dll here DebugBreak(); } - /* Windows returns 0 on failure, WT expects 0 on success */ - ret = !ret; - *dlhp = dlh; if (0) { err: __wt_free(session, dlh->name); @@ -56,10 +55,9 @@ __wt_dlsym(WT_SESSION_IMPL *session, *(void **)sym_ret = NULL; sym = GetProcAddress(dlh->handle, name); - if (sym == NULL && fail) { - WT_RET_MSG(session, __wt_errno(), + if (sym == NULL && fail) + WT_RET_MSG(session, __wt_getlasterror(), "GetProcAddress(%s in %s)", name, dlh->name); - } *(void **)sym_ret = sym; return (0); @@ -74,13 +72,11 @@ __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) { WT_DECL_RET; - if ((ret = FreeLibrary(dlh->handle)) == FALSE) { - __wt_err(session, __wt_errno(), "FreeLibrary"); + if (FreeLibrary(dlh->handle) == FALSE) { + ret = __wt_getlasterror(); + __wt_err(session, ret, "FreeLibrary: %s", dlh->name); } - /* Windows returns 0 on failure, WT expects 0 on success */ - ret = !ret; - __wt_free(session, dlh->name); __wt_free(session, dlh); return (ret); diff --git a/src/third_party/wiredtiger/src/os_win/os_errno.c b/src/third_party/wiredtiger/src/os_win/os_errno.c index 590fcdc9d44..f3fffd5ef42 100644 --- a/src/third_party/wiredtiger/src/os_win/os_errno.c +++ b/src/third_party/wiredtiger/src/os_win/os_errno.c @@ -46,13 +46,13 @@ __wt_map_windows_error_to_error(DWORD winerr) * of failures. */ int -__wt_map_error_rdonly(int winerr) +__wt_map_error_rdonly(int error) { - if (winerr == ERROR_FILE_NOT_FOUND) + if (error == ERROR_FILE_NOT_FOUND) return (WT_NOTFOUND); - else if (winerr == ERROR_ACCESS_DENIED) + else if (error == ERROR_ACCESS_DENIED) return (WT_PERM_DENIED); - return (winerr); + return (error); } /* @@ -63,14 +63,33 @@ int __wt_errno(void) { /* + * Check for 0: + * It's easy to introduce a problem by calling the wrong error function, + * for example, this function when the MSVC function set the C runtime + * error value. Handle gracefully and always return an error. + */ + return (errno == 0 ? WT_ERROR : errno); +} + +/* + * __wt_getlasterror -- + * Return GetLastError, or WT_ERROR if error not set. + */ +int +__wt_getlasterror(void) +{ + /* * Called when we know an error occurred, and we want the system - * error code, but there's some chance it's not set. + * error code. */ DWORD err = GetLastError(); - /* GetLastError should only be called if we hit an actual error */ - WT_ASSERT(NULL, err != ERROR_SUCCESS); - + /* + * Check for ERROR_SUCCESS: + * It's easy to introduce a problem by calling the wrong error function, + * for example, this function when the MSVC function set the C runtime + * error value. Handle gracefully and always return an error. + */ return (err == ERROR_SUCCESS ? WT_ERROR : __wt_map_windows_error_to_error(err)); } diff --git a/src/third_party/wiredtiger/src/os_win/os_exist.c b/src/third_party/wiredtiger/src/os_win/os_exist.c deleted file mode 100644 index ec1369cc727..00000000000 --- a/src/third_party/wiredtiger/src/os_win/os_exist.c +++ /dev/null @@ -1,33 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_exist -- - * Return if the file exists. - */ -int -__wt_exist(WT_SESSION_IMPL *session, const char *filename, bool *existp) -{ - WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, filename, &path)); - - ret = GetFileAttributesA(path); - - __wt_free(session, path); - - if (ret != INVALID_FILE_ATTRIBUTES) - *existp = true; - else - *existp = false; - - return (0); -} diff --git a/src/third_party/wiredtiger/src/os_win/os_fallocate.c b/src/third_party/wiredtiger/src/os_win/os_fallocate.c deleted file mode 100644 index a324687ca73..00000000000 --- a/src/third_party/wiredtiger/src/os_win/os_fallocate.c +++ /dev/null @@ -1,45 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_fallocate_config -- - * Configure fallocate behavior for a file handle. - */ -void -__wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_UNUSED(session); - - /* - * fallocate on Windows would be implemented using SetEndOfFile, which - * can also truncate the file. WiredTiger expects fallocate to ignore - * requests to truncate the file which Windows does not do, so we don't - * support the call. - */ - fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE; - fh->fallocate_requires_locking = false; -} - -/* - * __wt_fallocate -- - * Allocate space for a file handle. - */ -int -__wt_fallocate( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_UNUSED(session); - WT_UNUSED(fh); - WT_UNUSED(offset); - WT_UNUSED(len); - - return (ENOTSUP); -} diff --git a/src/third_party/wiredtiger/src/os_win/os_filesize.c b/src/third_party/wiredtiger/src/os_win/os_filesize.c deleted file mode 100644 index c9925fb18a8..00000000000 --- a/src/third_party/wiredtiger/src/os_win/os_filesize.c +++ /dev/null @@ -1,64 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_filesize -- - * Get the size of a file in bytes. - */ -int -__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) -{ - LARGE_INTEGER size; - WT_DECL_RET; - - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "%s: GetFileSizeEx", fh->name)); - - if ((ret = GetFileSizeEx(fh->filehandle, &size)) != 0) { - *sizep = size.QuadPart; - return (0); - } - - WT_RET_MSG(session, __wt_errno(), "%s: GetFileSizeEx", fh->name); -} - -/* - * __wt_filesize_name -- - * Return the size of a file in bytes, given a file name. - */ -int -__wt_filesize_name(WT_SESSION_IMPL *session, - const char *filename, bool silent, wt_off_t *sizep) -{ - WIN32_FILE_ATTRIBUTE_DATA data; - WT_DECL_RET; - char *path; - - WT_RET(__wt_filename(session, filename, &path)); - - ret = GetFileAttributesExA(path, GetFileExInfoStandard, &data); - - __wt_free(session, path); - - if (ret != 0) { - *sizep = - ((int64_t)data.nFileSizeHigh << 32) | data.nFileSizeLow; - return (0); - } - - /* - * Some callers of this function expect failure if the file doesn't - * exist, and don't want an error message logged. - */ - ret = __wt_errno(); - if (!silent) - WT_RET_MSG(session, ret, "%s: GetFileAttributesEx", filename); - return (ret); -} diff --git a/src/third_party/wiredtiger/src/os_win/os_flock.c b/src/third_party/wiredtiger/src/os_win/os_flock.c deleted file mode 100644 index 60a981499a5..00000000000 --- a/src/third_party/wiredtiger/src/os_win/os_flock.c +++ /dev/null @@ -1,47 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_bytelock -- - * Lock/unlock a byte in a file. - */ -int -__wt_bytelock(WT_FH *fhp, wt_off_t byte, bool lock) -{ - WT_DECL_RET; - - /* - * WiredTiger requires this function be able to acquire locks past - * the end of file. - * - * Note we're using fcntl(2) locking: all fcntl locks associated with a - * file for a given process are removed when any file descriptor for the - * file is closed by the process, even if a lock was never requested for - * that file descriptor. - * - * http://msdn.microsoft.com/ - * en-us/library/windows/desktop/aa365202%28v=vs.85%29.aspx - * - * You can lock bytes that are beyond the end of the current file. - * This is useful to coordinate adding records to the end of a file. - */ - if (lock) { - ret = LockFile(fhp->filehandle, UINT32_MAX & byte, - UINT32_MAX & (byte >> 32), 1, 0); - } else { - ret = UnlockFile(fhp->filehandle, UINT32_MAX & byte, - UINT32_MAX & (byte >> 32), 1, 0); - } - - if (ret == FALSE) - WT_RET_MSG(NULL, __wt_errno(), "%s: LockFile", fhp->name); - - return (0); -} diff --git a/src/third_party/wiredtiger/src/os_win/os_fs.c b/src/third_party/wiredtiger/src/os_win/os_fs.c new file mode 100644 index 00000000000..462773cb9fb --- /dev/null +++ b/src/third_party/wiredtiger/src/os_win/os_fs.c @@ -0,0 +1,707 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __win_directory_sync -- + * Flush a directory to ensure a file creation is durable. + */ +static int +__win_directory_sync(WT_SESSION_IMPL *session, const char *path) +{ + WT_UNUSED(session); + WT_UNUSED(path); + return (0); +} + +/* + * __win_file_exist -- + * Return if the file exists. + */ +static int +__win_file_exist(WT_SESSION_IMPL *session, const char *name, bool *existp) +{ + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + + ret = GetFileAttributesA(path); + + __wt_free(session, path); + + if (ret != INVALID_FILE_ATTRIBUTES) + *existp = true; + else + *existp = false; + + return (0); +} + +/* + * __win_file_remove -- + * Remove a file. + */ +static int +__win_file_remove(WT_SESSION_IMPL *session, const char *name) +{ + WT_DECL_RET; + char *path; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, name, false, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-remove: file has open handles", name); +#endif + + WT_RET(__wt_filename(session, name, &path)); + name = path; + + if (DeleteFileA(name) == FALSE) { + ret = __wt_getlasterror(); + __wt_err(session, ret, "%s: file-remove: DeleteFileA", name); + } + + __wt_free(session, path); + return (ret); +} + +/* + * __win_file_rename -- + * Rename a file. + */ +static int +__win_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) +{ + WT_DECL_RET; + char *from_path, *to_path; + +#ifdef HAVE_DIAGNOSTIC + if (__wt_handle_search(session, from, false, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", from); + if (__wt_handle_search(session, to, false, NULL, NULL)) + WT_RET_MSG(session, EINVAL, + "%s: file-rename: file has open handles", to); +#endif + + from_path = to_path = NULL; + WT_ERR(__wt_filename(session, from, &from_path)); + from = from_path; + WT_ERR(__wt_filename(session, to, &to_path)); + to = to_path; + + /* + * Check if file exists since Windows does not override the file if + * it exists. + */ + if (GetFileAttributesA(to) != INVALID_FILE_ATTRIBUTES) + if (DeleteFileA(to) == FALSE) { + ret = __wt_getlasterror(); + __wt_err(session, ret, + "%s to %s: file-rename: rename", from, to); + } + + if (ret == 0 && MoveFileA(from, to) == FALSE) { + ret = __wt_getlasterror(); + __wt_err(session, ret, + "%s to %s: file-rename: rename", from, to); + } + +err: __wt_free(session, from_path); + __wt_free(session, to_path); + return (ret); +} + +/* + * __win_file_size -- + * Get the size of a file in bytes, by file name. + */ +static int +__win_file_size( + WT_SESSION_IMPL *session, const char *name, bool silent, wt_off_t *sizep) +{ + WIN32_FILE_ATTRIBUTE_DATA data; + WT_DECL_RET; + char *path; + + WT_RET(__wt_filename(session, name, &path)); + + ret = GetFileAttributesExA(path, GetFileExInfoStandard, &data); + + __wt_free(session, path); + + if (ret != 0) { + *sizep = + ((int64_t)data.nFileSizeHigh << 32) | data.nFileSizeLow; + return (0); + } + + /* + * Some callers of this function expect failure if the file doesn't + * exist, and don't want an error message logged. + */ + ret = __wt_getlasterror(); + if (!silent) + WT_RET_MSG(session, ret, + "%s: file-size: GetFileAttributesEx", name); + return (ret); +} + +/* + * __win_handle_advise -- + * MSVC fadvise. + */ +static int +__win_handle_advise(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, wt_off_t len, int advice) +{ + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(offset); + WT_UNUSED(len); + WT_UNUSED(advice); + + /* Quietly fail, callers expect not-supported failures. */ + return (ENOTSUP); +} + +/* + * __win_handle_allocate_configure -- + * Configure fallocate behavior for a file handle. + */ +static void +__win_handle_allocate_configure(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_UNUSED(session); + + /* + * fallocate on Windows would be implemented using SetEndOfFile, which + * can also truncate the file. WiredTiger expects fallocate to ignore + * requests to truncate the file which Windows does not do, so we don't + * support the call. + */ + fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE; + fh->fallocate_requires_locking = false; +} + +/* + * __win_handle_allocate -- + * Allocate space for a file handle. + */ +static int +__win_handle_allocate( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len) +{ + WT_UNUSED(session); + WT_UNUSED(fh); + WT_UNUSED(offset); + WT_UNUSED(len); + + WT_RET_MSG(session, ENOTSUP, "%s: handle-allocate", fh->name); + return (ENOTSUP); +} + +/* + * __win_handle_close -- + * Close a file handle. + */ +static int +__win_handle_close(WT_SESSION_IMPL *session, WT_FH *fh) +{ + WT_DECL_RET; + + if (fh->fp == NULL) { + /* + * We don't open Windows system handles when opening directories + * for flushing, as it is not necessary (or possible) to flush + * a directory on Windows. Confirm the file handle is set before + * attempting to close it. + */ + if (fh->filehandle != INVALID_HANDLE_VALUE && + CloseHandle(fh->filehandle) == 0) { + ret = __wt_getlasterror(); + __wt_err(session, ret, + "%s: handle-close: CloseHandle", fh->name); + } + } else { + /* If the stream was opened for writing, flush the file. */ + if (F_ISSET(fh, WT_FH_FLUSH_ON_CLOSE) && fflush(fh->fp) != 0) { + ret = __wt_errno(); + __wt_err(session, + ret, "%s: handle-close: fflush", fh->name); + } + + /* Close the file, closing all the underlying handles. */ + if (fclose(fh->fp) != 0) { + ret = __wt_errno(); + __wt_err(session, + ret, "%s: handle-close: fclose", fh->name); + } + } + + /* Close the secondary handle. */ + if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && + CloseHandle(fh->filehandle_secondary) == 0) { + ret = __wt_getlasterror(); + __wt_err(session, ret, + "%s: handle-close: secondary: CloseHandle", fh->name); + } + return (ret); +} + +/* + * __win_handle_getc -- + * ANSI C fgetc. + */ +static int +__win_handle_getc(WT_SESSION_IMPL *session, WT_FH *fh, int *chp) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, + ENOTSUP, "%s: handle-getc: no stream configured", fh->name); + + *chp = fgetc(fh->fp); + if (*chp != EOF || !ferror(fh->fp)) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: handle-getc: fgetc", fh->name); +} + +/* + * __win_handle_lock -- + * Lock/unlock a file. + */ +static int +__win_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) +{ + WT_DECL_RET; + + /* + * WiredTiger requires this function be able to acquire locks past + * the end of file. + * + * Note we're using fcntl(2) locking: all fcntl locks associated with a + * file for a given process are removed when any file descriptor for the + * file is closed by the process, even if a lock was never requested for + * that file descriptor. + * + * http://msdn.microsoft.com/ + * en-us/library/windows/desktop/aa365202%28v=vs.85%29.aspx + * + * You can lock bytes that are beyond the end of the current file. + * This is useful to coordinate adding records to the end of a file. + */ + if (lock) { + if (LockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { + ret = __wt_getlasterror(); + __wt_err(session, ret, + "%s: handle-lock: LockFile", fh->name); + } + } else + if (UnlockFile(fh->filehandle, 0, 0, 1, 0) == FALSE) { + ret = __wt_getlasterror(); + __wt_err(session, ret, + "%s: handle-lock: UnlockFile", fh->name); + } + return (ret); +} + +/* + * __win_handle_printf -- + * ANSI C vfprintf. + */ +static int +__win_handle_printf( + WT_SESSION_IMPL *session, WT_FH *fh, const char *fmt, va_list ap) +{ + if (fh->fp == NULL) + WT_RET_MSG(session, ENOTSUP, + "%s: vfprintf: no stream configured", fh->name); + + if (vfprintf(fh->fp, fmt, ap) >= 0) + return (0); + WT_RET_MSG(session, EIO, "%s: handle-printf: vfprintf", fh->name); +} + +/* + * __win_handle_read -- + * Read a chunk. + */ +static int +__win_handle_read( + WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) +{ + DWORD chunk, nr; + uint8_t *addr; + OVERLAPPED overlapped = { 0 }; + + nr = 0; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break reads larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { + chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); + overlapped.Offset = UINT32_MAX & offset; + overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); + + if (!ReadFile(fh->filehandle, addr, chunk, &nr, &overlapped)) + WT_RET_MSG(session, + nr == 0 ? WT_ERROR : __wt_getlasterror(), + "%s: handle-read: ReadFile: failed to read %lu " + "bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __win_handle_size -- + * Get the size of a file in bytes, by file handle. + */ +static int +__win_handle_size(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) +{ + LARGE_INTEGER size; + + if (GetFileSizeEx(fh->filehandle, &size) != 0) { + *sizep = size.QuadPart; + return (0); + } + + WT_RET_MSG(session, + __wt_getlasterror(), "%s: handle-size: GetFileSizeEx", fh->name); +} + +/* + * __win_handle_sync -- + * MSVC fflush/fsync. + */ +static int +__win_handle_sync(WT_SESSION_IMPL *session, WT_FH *fh, bool block) +{ + WT_DECL_RET; + + WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); + + /* + * We don't open Windows system handles when opening directories + * for flushing, as it is not necessary (or possible) to flush + * a directory on Windows. Confirm the file handle is set before + * attempting to sync it. + */ + if (fh->fp == NULL && fh->filehandle == INVALID_HANDLE_VALUE) + return (0); + + if (fh->fp == NULL) { + /* + * Callers attempting asynchronous flush handle ENOTSUP returns, + * and won't make further attempts. + */ + if (!block) + return (ENOTSUP); + + if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE) + WT_RET_MSG(session, __wt_getlasterror(), + "%s handle-sync: FlushFileBuffers error", fh->name); + return (0); + } + + if (fflush(fh->fp) == 0) + return (0); + WT_RET_MSG(session, __wt_errno(), "%s: handle-sync: fflush", fh->name); +} + +/* + * __win_handle_truncate -- + * Truncate a file. + */ +static int +__win_handle_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) +{ + WT_DECL_RET; + LARGE_INTEGER largeint; + + largeint.QuadPart = len; + + if (fh->filehandle_secondary == INVALID_HANDLE_VALUE) + WT_RET_MSG(session, EINVAL, + "%s: handle-truncate: read-only", fh->name); + + if (SetFilePointerEx( + fh->filehandle_secondary, largeint, NULL, FILE_BEGIN) == FALSE) + WT_RET_MSG(session, __wt_getlasterror(), + "%s: handle-truncate: SetFilePointerEx", fh->name); + + if (SetEndOfFile(fh->filehandle_secondary) == FALSE) { + if (GetLastError() == ERROR_USER_MAPPED_FILE) + return (EBUSY); + WT_RET_MSG(session, __wt_getlasterror(), + "%s: handle-truncate: SetEndOfFile error", fh->name); + } + return (0); +} + +/* + * __win_handle_write -- + * Write a chunk. + */ +static int +__win_handle_write(WT_SESSION_IMPL *session, + WT_FH *fh, wt_off_t offset, size_t len, const void *buf) +{ + DWORD chunk; + DWORD nw; + const uint8_t *addr; + OVERLAPPED overlapped = { 0 }; + + nw = 0; + + /* Assert direct I/O is aligned and a multiple of the alignment. */ + WT_ASSERT(session, + !fh->direct_io || + S2C(session)->buffer_alignment == 0 || + (!((uintptr_t)buf & + (uintptr_t)(S2C(session)->buffer_alignment - 1)) && + len >= S2C(session)->buffer_alignment && + len % S2C(session)->buffer_alignment == 0)); + + /* Break writes larger than 1GB into 1GB chunks. */ + for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { + chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); + overlapped.Offset = UINT32_MAX & offset; + overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); + + if (!WriteFile(fh->filehandle, addr, chunk, &nw, &overlapped)) + WT_RET_MSG(session, __wt_getlasterror(), + "%s: handle-write: WriteFile: failed to write %lu " + "bytes at offset %" PRIuMAX, + fh->name, chunk, (uintmax_t)offset); + } + return (0); +} + +/* + * __win_handle_open -- + * Open a file handle. + */ +static int +__win_handle_open(WT_SESSION_IMPL *session, + WT_FH *fh, const char *name, uint32_t file_type, uint32_t flags) +{ + DWORD dwCreationDisposition; + HANDLE filehandle, filehandle_secondary; + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + int desired_access, f, fd; + bool direct_io; + const char *stream_mode; + + conn = S2C(session); + direct_io = false; + + /* Set up error handling. */ + fh->filehandle = fh->filehandle_secondary = + filehandle = filehandle_secondary = INVALID_HANDLE_VALUE; + fh->fp = NULL; + + /* + * Opening a file handle on a directory is only to support filesystems + * that require a directory sync for durability, and Windows doesn't + * require that functionality: create an empty WT_FH structure with + * invalid handles. + */ + if (file_type == WT_FILE_TYPE_DIRECTORY) + goto directory_open; + + desired_access = GENERIC_READ; + if (!LF_ISSET(WT_OPEN_READONLY)) + desired_access |= GENERIC_WRITE; + + /* + * Security: + * The application may spawn a new process, and we don't want another + * process to have access to our file handles. + * + * TODO: Set tighter file permissions but set bInheritHandle to false + * to prevent inheritance + */ + f = FILE_ATTRIBUTE_NORMAL; + + dwCreationDisposition = 0; + if (LF_ISSET(WT_OPEN_CREATE)) { + dwCreationDisposition = CREATE_NEW; + if (LF_ISSET(WT_OPEN_EXCLUSIVE)) + dwCreationDisposition = CREATE_ALWAYS; + } else + dwCreationDisposition = OPEN_EXISTING; + + /* + * direct_io means no OS file caching. This requires aligned buffer + * allocations like O_DIRECT. + */ + if (FLD_ISSET(conn->direct_io, file_type) || + (LF_ISSET(WT_OPEN_READONLY) && + file_type == WT_FILE_TYPE_DATA && + FLD_ISSET(conn->direct_io, WT_FILE_TYPE_CHECKPOINT))) { + f |= FILE_FLAG_NO_BUFFERING; + direct_io = true; + } + fh->direct_io = direct_io; + + /* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */ + if (FLD_ISSET(conn->write_through, file_type)) + f |= FILE_FLAG_WRITE_THROUGH; + + if (file_type == WT_FILE_TYPE_LOG && + FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) + f |= FILE_FLAG_WRITE_THROUGH; + + /* Disable read-ahead on trees: it slows down random read workloads. */ + if (file_type == WT_FILE_TYPE_DATA) + f |= FILE_FLAG_RANDOM_ACCESS; + + filehandle = CreateFileA(name, desired_access, + FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, dwCreationDisposition, f, NULL); + if (filehandle == INVALID_HANDLE_VALUE) { + if (LF_ISSET(WT_OPEN_CREATE) && + GetLastError() == ERROR_FILE_EXISTS) + filehandle = CreateFileA(name, desired_access, + FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, OPEN_EXISTING, f, NULL); + if (filehandle == INVALID_HANDLE_VALUE) + WT_ERR_MSG(session, __wt_getlasterror(), + direct_io ? + "%s: handle-open: CreateFileA: failed with direct " + "I/O configured, some filesystem types do not " + "support direct I/O" : + "%s: handle-open: CreateFileA", name); + } + + /* + * Open a second handle to file to support allocation/truncation + * concurrently with reads on the file. Writes would also move the file + * pointer. + */ + if (!LF_ISSET(WT_OPEN_READONLY)) { + filehandle_secondary = CreateFileA(name, desired_access, + FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, OPEN_EXISTING, f, NULL); + if (filehandle_secondary == INVALID_HANDLE_VALUE) + WT_ERR_MSG(session, __wt_getlasterror(), + "%s: handle-open: CreateFileA: secondary", name); + } + + /* Optionally configure a stdio stream API. */ + switch (LF_MASK(WT_STREAM_APPEND | WT_STREAM_READ | WT_STREAM_WRITE)) { + case WT_STREAM_APPEND: + f = _O_APPEND | _O_TEXT; + stream_mode = "a"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case WT_STREAM_READ: + f = _O_RDONLY | _O_TEXT; + stream_mode = "r"; + break; + case WT_STREAM_WRITE: + f = _O_TEXT; + stream_mode = "w"; + F_SET(fh, WT_FH_FLUSH_ON_CLOSE); + break; + case 0: + default: + stream_mode = NULL; + break; + } + if (stream_mode != NULL) { + if ((fd = _open_osfhandle((intptr_t)filehandle, f)) == -1) + WT_ERR_MSG(session, __wt_errno(), + "%s: handle-open: _open_osfhandle", name); + if ((fh->fp = fdopen(fd, stream_mode)) == NULL) + WT_ERR_MSG(session, __wt_errno(), + "%s: handle-open: fdopen", name); + if (LF_ISSET(WT_STREAM_LINE_BUFFER)) + __wt_stream_set_line_buffer(fh->fp); + } + + /* Configure fallocate/posix_fallocate calls. */ + __win_handle_allocate_configure(session, fh); + +directory_open: + fh->filehandle = filehandle; + fh->filehandle_secondary = filehandle_secondary; + + fh->fh_advise = __win_handle_advise; + fh->fh_allocate = __win_handle_allocate; + fh->fh_close = __win_handle_close; + fh->fh_getc = __win_handle_getc; + fh->fh_lock = __win_handle_lock; + fh->fh_map = __wt_win_map; + fh->fh_map_discard = __wt_win_map_discard; + fh->fh_map_preload = __wt_win_map_preload; + fh->fh_map_unmap = __wt_win_map_unmap; + fh->fh_printf = __win_handle_printf; + fh->fh_read = __win_handle_read; + fh->fh_size = __win_handle_size; + fh->fh_sync = __win_handle_sync; + fh->fh_truncate = __win_handle_truncate; + fh->fh_write = __win_handle_write; + + return (0); + +err: if (filehandle != INVALID_HANDLE_VALUE) + (void)CloseHandle(filehandle); + if (filehandle_secondary != INVALID_HANDLE_VALUE) + (void)CloseHandle(filehandle_secondary); + + return (ret); +} + +/* + * __wt_os_win -- + * Initialize a MSVC configuration. + */ +int +__wt_os_win(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + + /* Initialize the POSIX jump table. */ + conn->file_directory_list = __wt_win_directory_list; + conn->file_directory_sync = __win_directory_sync; + conn->file_exist = __win_file_exist; + conn->file_remove = __win_file_remove; + conn->file_rename = __win_file_rename; + conn->file_size = __win_file_size; + conn->handle_open = __win_handle_open; + + return (0); +} + +/* + * __wt_os_win_cleanup -- + * Discard a POSIX configuration. + */ +int +__wt_os_win_cleanup(WT_SESSION_IMPL *session) +{ + WT_UNUSED(session); + + return (0); +} diff --git a/src/third_party/wiredtiger/src/os_win/os_fsync.c b/src/third_party/wiredtiger/src/os_win/os_fsync.c deleted file mode 100644 index c196fc6c06a..00000000000 --- a/src/third_party/wiredtiger/src/os_win/os_fsync.c +++ /dev/null @@ -1,71 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_directory_sync_fh -- - * Flush a directory file handle. - */ -int -__wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_UNUSED(session); - WT_UNUSED(fh); - return (0); -} - -/* - * __wt_directory_sync -- - * Flush a directory to ensure a file creation is durable. - */ -int -__wt_directory_sync(WT_SESSION_IMPL *session, const char *path) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_UNUSED(session); - WT_UNUSED(path); - return (0); -} - -/* - * __wt_fsync -- - * Flush a file handle. - */ -int -__wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_DECL_RET; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: FlushFileBuffers", - fh->name)); - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || - WT_STRING_MATCH(fh->name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))); - if ((ret = FlushFileBuffers(fh->filehandle)) == FALSE) - WT_RET_MSG(session, - __wt_errno(), "%s FlushFileBuffers error", fh->name); - - return (0); -} - -/* - * __wt_fsync_async -- - * Flush a file handle and don't wait for the result. - */ -int -__wt_fsync_async(WT_SESSION_IMPL *session, WT_FH *fh) -{ - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - WT_UNUSED(session); - WT_UNUSED(fh); - - return (0); -} diff --git a/src/third_party/wiredtiger/src/os_win/os_ftruncate.c b/src/third_party/wiredtiger/src/os_win/os_ftruncate.c deleted file mode 100644 index 88fcf9542c1..00000000000 --- a/src/third_party/wiredtiger/src/os_win/os_ftruncate.c +++ /dev/null @@ -1,37 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_ftruncate -- - * Truncate a file. - */ -int -__wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len) -{ - WT_DECL_RET; - LARGE_INTEGER largeint; - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - largeint.QuadPart = len; - - if ((ret = SetFilePointerEx( - fh->filehandle_secondary, largeint, NULL, FILE_BEGIN)) == FALSE) - WT_RET_MSG(session, __wt_errno(), "%s SetFilePointerEx error", - fh->name); - - ret = SetEndOfFile(fh->filehandle_secondary); - if (ret != FALSE) - return (0); - - if (GetLastError() == ERROR_USER_MAPPED_FILE) - return (EBUSY); - - WT_RET_MSG(session, __wt_errno(), "%s SetEndOfFile error", fh->name); -} diff --git a/src/third_party/wiredtiger/src/os_win/os_getenv.c b/src/third_party/wiredtiger/src/os_win/os_getenv.c index c9084769cd5..9b297ac3a74 100644 --- a/src/third_party/wiredtiger/src/os_win/os_getenv.c +++ b/src/third_party/wiredtiger/src/os_win/os_getenv.c @@ -29,7 +29,7 @@ __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) ret = GetEnvironmentVariableA(variable, *envp, size); /* We expect the number of bytes not including nul terminator. */ if ((ret + 1) != size) - WT_RET_MSG(session, __wt_errno(), + WT_RET_MSG(session, __wt_getlasterror(), "GetEnvironmentVariableA failed: %s", variable); return (0); diff --git a/src/third_party/wiredtiger/src/os_win/os_map.c b/src/third_party/wiredtiger/src/os_win/os_map.c index dc040b4fa54..b043f9c9923 100644 --- a/src/third_party/wiredtiger/src/os_win/os_map.c +++ b/src/third_party/wiredtiger/src/os_win/os_map.c @@ -9,102 +9,110 @@ #include "wt_internal.h" /* - * __wt_mmap -- + * __wt_win_map -- * Map a file into memory. */ int -__wt_mmap(WT_SESSION_IMPL *session, WT_FH *fh, void *mapp, size_t *lenp, - void** mappingcookie) +__wt_win_map(WT_SESSION_IMPL *session, + WT_FH *fh, void *mapp, size_t *lenp, void **mappingcookie) { + WT_DECL_RET; + size_t len; + wt_off_t file_size; void *map; - size_t orig_size; /* - * Record the current size and only map and set that as the length, it - * could change between the map call and when we set the return length. - * For the same reason we could actually map past the end of the file; - * we don't read bytes past the end of the file though, so as long as - * the map call succeeds, it's all OK. + * There's no locking here to prevent the underlying file from changing + * underneath us, our caller needs to ensure consistency of the mapped + * region vs. any other file activity. */ - orig_size = (size_t)fh->size; + WT_RET(__wt_filesize(session, fh, &file_size)); + len = (size_t)file_size; + + (void)__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: memory-map: %" WT_SIZET_FMT " bytes", fh->name, len); + *mappingcookie = CreateFileMappingA(fh->filehandle, NULL, PAGE_READONLY, 0, 0, NULL); if (*mappingcookie == NULL) - WT_RET_MSG(session, __wt_errno(), - "%s CreateFileMapping error: failed to map %" - WT_SIZET_FMT " bytes", - fh->name, orig_size); + WT_RET_MSG(session, __wt_getlasterror(), + "%s: memory-map: CreateFileMappingA", fh->name); - if ((map = MapViewOfFile( - *mappingcookie, FILE_MAP_READ, 0, 0, orig_size)) == NULL) { + if ((map = + MapViewOfFile(*mappingcookie, FILE_MAP_READ, 0, 0, len)) == NULL) { + /* Retrieve the error before cleaning up. */ + ret = __wt_getlasterror(); CloseHandle(*mappingcookie); *mappingcookie = NULL; - WT_RET_MSG(session, __wt_errno(), - "%s map error: failed to map %" WT_SIZET_FMT " bytes", - fh->name, orig_size); + WT_RET_MSG(session, ret, + "%s: memory-map: MapViewOfFile", fh->name); } - (void)__wt_verbose(session, WT_VERB_FILEOPS, - "%s: MapViewOfFile %p: %" WT_SIZET_FMT " bytes", - fh->name, map, orig_size); *(void **)mapp = map; - *lenp = orig_size; + *lenp = len; return (0); } /* - * __wt_mmap_preload -- + * __wt_win_map_preload -- * Cause a section of a memory map to be faulted in. */ int -__wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size) +__wt_win_map_preload( + WT_SESSION_IMPL *session, WT_FH *fh, const void *p, size_t size) { WT_UNUSED(session); + WT_UNUSED(fh); WT_UNUSED(p); WT_UNUSED(size); - return (0); + return (ENOTSUP); } /* - * __wt_mmap_discard -- + * __wt_win_map_discard -- * Discard a chunk of the memory map. */ int -__wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size) +__wt_win_map_discard(WT_SESSION_IMPL *session, WT_FH *fh, void *p, size_t size) { WT_UNUSED(session); + WT_UNUSED(fh); WT_UNUSED(p); WT_UNUSED(size); - return (0); + + return (ENOTSUP); } /* - * __wt_munmap -- + * __wt_win_map_unmap -- * Remove a memory mapping. */ int -__wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, - void** mappingcookie) +__wt_win_map_unmap(WT_SESSION_IMPL *session, + WT_FH *fh, void *map, size_t len, void **mappingcookie) { - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, - "%s: UnmapViewOfFile %p: %" WT_SIZET_FMT " bytes", - fh->name, map, len)); + WT_DECL_RET; + + (void)__wt_verbose(session, WT_VERB_HANDLEOPS, + "%s: memory-unmap: %" WT_SIZET_FMT " bytes", fh->name, len); + + WT_ASSERT(session, *mappingcookie != NULL); if (UnmapViewOfFile(map) == 0) { - WT_RET_MSG(session, __wt_errno(), - "%s UnmapViewOfFile error: failed to unmap %" WT_SIZET_FMT - " bytes", - fh->name, len); + ret = __wt_getlasterror(); + __wt_err(session, ret, + "%s: memory-unmap: UnmapViewOfFile", fh->name); } if (CloseHandle(*mappingcookie) == 0) { - WT_RET_MSG(session, __wt_errno(), - "CloseHandle: MapViewOfFile: %s", fh->name); + ret = __wt_getlasterror(); + __wt_err(session, ret, + "%s: memory-unmap: CloseHandle", fh->name); } - *mappingcookie = 0; + *mappingcookie = NULL; - return (0); + return (ret); } diff --git a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c index 14bac2a99d9..af4a5035076 100644 --- a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c +++ b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c @@ -103,7 +103,7 @@ __wt_cond_wait_signal( if ((err = GetLastError()) == ERROR_TIMEOUT) *signalled = false; else - ret = __wt_errno(); + ret = __wt_getlasterror(); } else ret = 0; diff --git a/src/third_party/wiredtiger/src/os_win/os_once.c b/src/third_party/wiredtiger/src/os_win/os_once.c index 9ea3fe044eb..347d1883cca 100644 --- a/src/third_party/wiredtiger/src/os_win/os_once.c +++ b/src/third_party/wiredtiger/src/os_win/os_once.c @@ -32,7 +32,7 @@ BOOL CALLBACK _wt_init_once_callback( * One-time initialization per process. */ int -__wt_once(void(*init_routine)(void)) +__wt_once(void (*init_routine)(void)) { INIT_ONCE once_control = INIT_ONCE_STATIC_INIT; PVOID lpContext = NULL; diff --git a/src/third_party/wiredtiger/src/os_win/os_open.c b/src/third_party/wiredtiger/src/os_win/os_open.c deleted file mode 100644 index f10582c5bd1..00000000000 --- a/src/third_party/wiredtiger/src/os_win/os_open.c +++ /dev/null @@ -1,266 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_open -- - * Open a file handle. - */ -int -__wt_open(WT_SESSION_IMPL *session, - const char *name, bool ok_create, bool exclusive, int dio_type, WT_FH **fhp) -{ - DWORD dwCreationDisposition; - HANDLE filehandle, filehandle_secondary; - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_FH *fh, *tfh; - uint64_t bucket, hash; - int f, share_mode; - bool direct_io, matched; - char *path; - - conn = S2C(session); - fh = NULL; - path = NULL; - filehandle = INVALID_HANDLE_VALUE; - filehandle_secondary = INVALID_HANDLE_VALUE; - direct_io = false; - hash = __wt_hash_city64(name, strlen(name)); - bucket = hash % WT_HASH_ARRAY_SIZE; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: open", name)); - - /* Increment the reference count if we already have the file open. */ - matched = false; - __wt_spin_lock(session, &conn->fh_lock); - TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) - if (strcmp(name, tfh->name) == 0) { - ++tfh->ref; - *fhp = tfh; - matched = true; - break; - } - __wt_spin_unlock(session, &conn->fh_lock); - if (matched) - return (0); - - /* For directories, create empty file handles with invalid handles */ - if (dio_type == WT_FILE_TYPE_DIRECTORY) { - goto setupfh; - } - - WT_RET(__wt_filename(session, name, &path)); - - /* - * If this is a read-only connection, open all files read-only - * except the lock file. - */ - if (F_ISSET(conn, WT_CONN_READONLY) && - !WT_STRING_MATCH(name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))) - share_mode = FILE_SHARE_READ; - else - share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE; - - /* - * Security: - * The application may spawn a new process, and we don't want another - * process to have access to our file handles. - * - * TODO: Set tighter file permissions but set bInheritHandle to false - * to prevent inheritance - */ - - f = FILE_ATTRIBUTE_NORMAL; - - dwCreationDisposition = 0; - if (ok_create) { - WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) || - WT_STRING_MATCH(name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))); - dwCreationDisposition = CREATE_NEW; - if (exclusive) - dwCreationDisposition = CREATE_ALWAYS; - } else - dwCreationDisposition = OPEN_EXISTING; - - /* - * direct_io means no OS file caching. This requires aligned buffer - * allocations like O_DIRECT. - */ - if (dio_type && FLD_ISSET(conn->direct_io, dio_type)) { - f |= FILE_FLAG_NO_BUFFERING; - direct_io = true; - } - - /* FILE_FLAG_WRITE_THROUGH does not require aligned buffers */ - if (dio_type && FLD_ISSET(conn->write_through, dio_type)) { - f |= FILE_FLAG_WRITE_THROUGH; - } - - if (dio_type == WT_FILE_TYPE_LOG && - FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) { - f |= FILE_FLAG_WRITE_THROUGH; - } - - /* Disable read-ahead on trees: it slows down random read workloads. */ - if (dio_type == WT_FILE_TYPE_DATA || - dio_type == WT_FILE_TYPE_CHECKPOINT) - f |= FILE_FLAG_RANDOM_ACCESS; - - filehandle = CreateFileA(path, - (GENERIC_READ | GENERIC_WRITE), - share_mode, - NULL, - dwCreationDisposition, - f, - NULL); - if (filehandle == INVALID_HANDLE_VALUE) { - if (GetLastError() == ERROR_FILE_EXISTS && ok_create) - filehandle = CreateFileA(path, - (GENERIC_READ | GENERIC_WRITE), - share_mode, - NULL, - OPEN_EXISTING, - f, - NULL); - - if (filehandle == INVALID_HANDLE_VALUE) - WT_ERR_MSG(session, __wt_errno(), - direct_io ? - "%s: open failed with direct I/O configured, some " - "filesystem types do not support direct I/O" : - "%s", path); - } - - /* - * Open a second handle to file to support allocation/truncation - * concurrently with reads on the file. Writes would also move the file - * pointer. - */ - filehandle_secondary = CreateFileA(path, - (GENERIC_READ | GENERIC_WRITE), - share_mode, - NULL, - OPEN_EXISTING, - f, - NULL); - if (filehandle == INVALID_HANDLE_VALUE) - WT_ERR_MSG(session, __wt_errno(), - "open failed for secondary handle: %s", path); - -setupfh: - WT_ERR(__wt_calloc_one(session, &fh)); - WT_ERR(__wt_strdup(session, name, &fh->name)); - fh->name_hash = hash; - fh->filehandle = filehandle; - fh->filehandle_secondary = filehandle_secondary; - fh->ref = 1; - fh->direct_io = direct_io; - - /* Set the file's size. */ - if (dio_type != WT_FILE_TYPE_DIRECTORY) - WT_ERR(__wt_filesize(session, fh, &fh->size)); - - /* Configure file extension. */ - if (dio_type == WT_FILE_TYPE_DATA || - dio_type == WT_FILE_TYPE_CHECKPOINT) - fh->extend_len = conn->data_extend_len; - - /* Configure fallocate/posix_fallocate calls. */ - __wt_fallocate_config(session, fh); - - /* - * Repeat the check for a match, but then link onto the database's list - * of files. - */ - matched = false; - __wt_spin_lock(session, &conn->fh_lock); - TAILQ_FOREACH(tfh, &conn->fhhash[bucket], hashq) - if (strcmp(name, tfh->name) == 0) { - ++tfh->ref; - *fhp = tfh; - matched = true; - break; - } - if (!matched) { - WT_CONN_FILE_INSERT(conn, fh, bucket); - (void)__wt_atomic_add32(&conn->open_file_count, 1); - - *fhp = fh; - } - __wt_spin_unlock(session, &conn->fh_lock); - if (matched) { -err: if (fh != NULL) { - __wt_free(session, fh->name); - __wt_free(session, fh); - } - if (filehandle != INVALID_HANDLE_VALUE) - (void)CloseHandle(filehandle); - if (filehandle_secondary != INVALID_HANDLE_VALUE) - (void)CloseHandle(filehandle_secondary); - } - - __wt_free(session, path); - return (ret); -} - -/* - * __wt_close -- - * Close a file handle. - */ -int -__wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) -{ - WT_CONNECTION_IMPL *conn; - WT_DECL_RET; - WT_FH *fh; - uint64_t bucket; - - conn = S2C(session); - - if (*fhp == NULL) - return (0); - fh = *fhp; - *fhp = NULL; - - __wt_spin_lock(session, &conn->fh_lock); - if (fh == NULL || fh->ref == 0 || --fh->ref > 0) { - __wt_spin_unlock(session, &conn->fh_lock); - return (0); - } - - /* Remove from the list. */ - bucket = fh->name_hash % WT_HASH_ARRAY_SIZE; - WT_CONN_FILE_REMOVE(conn, fh, bucket); - (void)__wt_atomic_sub32(&conn->open_file_count, 1); - - __wt_spin_unlock(session, &conn->fh_lock); - - /* Discard the memory. - * Note: For directories, we do not open valid directory handles on - * windows since it is not possible to sync a directory - */ - if (fh->filehandle != INVALID_HANDLE_VALUE && - CloseHandle(fh->filehandle) == 0) { - ret = __wt_errno(); - __wt_err(session, ret, "CloseHandle: %s", fh->name); - } - - if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && - CloseHandle(fh->filehandle_secondary) == 0) { - ret = __wt_errno(); - __wt_err(session, ret, "CloseHandle: secondary: %s", fh->name); - } - - __wt_free(session, fh->name); - __wt_free(session, fh); - return (ret); -} diff --git a/src/third_party/wiredtiger/src/os_win/os_remove.c b/src/third_party/wiredtiger/src/os_win/os_remove.c deleted file mode 100644 index 84f1dd86674..00000000000 --- a/src/third_party/wiredtiger/src/os_win/os_remove.c +++ /dev/null @@ -1,71 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __remove_file_check -- - * Check if the file is currently open before removing it. - */ -static inline void -__remove_file_check(WT_SESSION_IMPL *session, const char *name) -{ -#ifdef HAVE_DIAGNOSTIC - WT_CONNECTION_IMPL *conn; - WT_FH *fh; - uint64_t bucket; - - conn = S2C(session); - WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY)); - fh = NULL; - bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE; - - /* - * Check if the file is open: it's an error if it is, since a higher - * level should have closed it before removing. - */ - __wt_spin_lock(session, &conn->fh_lock); - TAILQ_FOREACH(fh, &conn->fhhash[bucket], hashq) - if (strcmp(name, fh->name) == 0) - break; - __wt_spin_unlock(session, &conn->fh_lock); - - WT_ASSERT(session, fh == NULL); -#else - WT_UNUSED(session); - WT_UNUSED(name); -#endif -} - -/* - * __wt_remove -- - * Remove a file. - */ -int -__wt_remove(WT_SESSION_IMPL *session, const char *name) -{ - WT_DECL_RET; - char *path; - uint32_t lasterror; - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, "%s: remove", name)); - - __remove_file_check(session, name); - - WT_RET(__wt_filename(session, name, &path)); - - if ((ret = DeleteFileA(path)) == FALSE) - lasterror = __wt_errno(); - - __wt_free(session, path); - - if (ret != FALSE) - return (0); - - WT_RET_MSG(session, lasterror, "%s: remove", name); -} diff --git a/src/third_party/wiredtiger/src/os_win/os_rename.c b/src/third_party/wiredtiger/src/os_win/os_rename.c deleted file mode 100644 index b4be2dba24c..00000000000 --- a/src/third_party/wiredtiger/src/os_win/os_rename.c +++ /dev/null @@ -1,53 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_rename -- - * Rename a file. - */ -int -__wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to) -{ - WT_DECL_RET; - uint32_t lasterror; - char *from_path, *to_path; - - WT_RET(__wt_verbose( - session, WT_VERB_FILEOPS, "rename %s to %s", from, to)); - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY)); - from_path = to_path = NULL; - - WT_RET(__wt_filename(session, from, &from_path)); - WT_TRET(__wt_filename(session, to, &to_path)); - - /* - * Check if file exists since Windows does not override the file if - * it exists. - */ - if ((ret = GetFileAttributesA(to_path)) != INVALID_FILE_ATTRIBUTES) { - if ((ret = DeleteFileA(to_path)) == FALSE) { - lasterror = __wt_errno(); - goto err; - } - } - - if ((MoveFileA(from_path, to_path)) == FALSE) - lasterror = __wt_errno(); - -err: - __wt_free(session, from_path); - __wt_free(session, to_path); - - if (ret != FALSE) - return (0); - - WT_RET_MSG(session, lasterror, "MoveFile %s to %s", from, to); -} diff --git a/src/third_party/wiredtiger/src/os_win/os_rw.c b/src/third_party/wiredtiger/src/os_win/os_rw.c deleted file mode 100644 index a9537a648f9..00000000000 --- a/src/third_party/wiredtiger/src/os_win/os_rw.c +++ /dev/null @@ -1,102 +0,0 @@ -/*- - * Copyright (c) 2014-2016 MongoDB, Inc. - * Copyright (c) 2008-2014 WiredTiger, Inc. - * All rights reserved. - * - * See the file LICENSE for redistribution information. - */ - -#include "wt_internal.h" - -/* - * __wt_read -- - * Read a chunk. - */ -int -__wt_read( - WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, size_t len, void *buf) -{ - DWORD chunk; - DWORD nr; - uint8_t *addr; - OVERLAPPED overlapped = { 0 }; - - nr = 0; - - WT_STAT_FAST_CONN_INCR(session, read_io); - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, - "%s: read %" WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, len, (uintmax_t)offset)); - - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break reads larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nr, len -= (size_t)nr, offset += nr) { - chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); - overlapped.Offset = UINT32_MAX & offset; - overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); - - if (!ReadFile(fh->filehandle, addr, chunk, &nr, &overlapped)) - WT_RET_MSG(session, nr == 0 ? WT_ERROR : __wt_errno(), - "%s read error: failed to read %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} - -/* - * __wt_write -- - * Write a chunk. - */ -int -__wt_write(WT_SESSION_IMPL *session, - WT_FH *fh, wt_off_t offset, size_t len, const void *buf) -{ - DWORD chunk; - DWORD nw; - const uint8_t *addr; - OVERLAPPED overlapped = { 0 }; - - nw = 0; - - WT_STAT_FAST_CONN_INCR(session, write_io); - - WT_RET(__wt_verbose(session, WT_VERB_FILEOPS, - "%s: write %" WT_SIZET_FMT " bytes at offset %" PRIuMAX, - fh->name, len, (uintmax_t)offset)); - - WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY) || - WT_STRING_MATCH(fh->name, WT_SINGLETHREAD, - strlen(WT_SINGLETHREAD))); - /* Assert direct I/O is aligned and a multiple of the alignment. */ - WT_ASSERT(session, - !fh->direct_io || - S2C(session)->buffer_alignment == 0 || - (!((uintptr_t)buf & - (uintptr_t)(S2C(session)->buffer_alignment - 1)) && - len >= S2C(session)->buffer_alignment && - len % S2C(session)->buffer_alignment == 0)); - - /* Break writes larger than 1GB into 1GB chunks. */ - for (addr = buf; len > 0; addr += nw, len -= (size_t)nw, offset += nw) { - chunk = (DWORD)WT_MIN(len, WT_GIGABYTE); - overlapped.Offset = UINT32_MAX & offset; - overlapped.OffsetHigh = UINT32_MAX & (offset >> 32); - - if (!WriteFile(fh->filehandle, addr, chunk, &nw, &overlapped)) - WT_RET_MSG(session, __wt_errno(), - "%s write error: failed to write %" WT_SIZET_FMT - " bytes at offset %" PRIuMAX, - fh->name, chunk, (uintmax_t)offset); - } - return (0); -} diff --git a/src/third_party/wiredtiger/src/os_win/os_setvbuf.c b/src/third_party/wiredtiger/src/os_win/os_setvbuf.c new file mode 100644 index 00000000000..b38ab1ebee2 --- /dev/null +++ b/src/third_party/wiredtiger/src/os_win/os_setvbuf.c @@ -0,0 +1,38 @@ +/*- + * Copyright (c) 2014-2016 MongoDB, Inc. + * Copyright (c) 2008-2014 WiredTiger, Inc. + * All rights reserved. + * + * See the file LICENSE for redistribution information. + */ + +#include "wt_internal.h" + +/* + * __wt_stream_set_line_buffer -- + * Set line buffering on a stream. + */ +void +__wt_stream_set_line_buffer(FILE *fp) +{ + /* + * This function exists because MSVC doesn't support buffer sizes of 0 + * to the setvbuf call. To avoid re-introducing the bug, we have helper + * functions and disallow calling setvbuf directly in WiredTiger code. + * + * Additionally, MSVC doesn't support line buffering, the result is the + * same as full-buffering. We assume our caller wants immediate output, + * set no-buffering instead. + */ + __wt_stream_set_no_buffer(fp); +} + +/* + * __wt_stream_set_no_buffer -- + * Turn off buffering on a stream. + */ +void +__wt_stream_set_no_buffer(FILE *fp) +{ + (void)setvbuf(fp, NULL, _IONBF, 0); +} diff --git a/src/third_party/wiredtiger/src/os_win/os_sleep.c b/src/third_party/wiredtiger/src/os_win/os_sleep.c index 1d4b316488a..1cb61f7c4aa 100644 --- a/src/third_party/wiredtiger/src/os_win/os_sleep.c +++ b/src/third_party/wiredtiger/src/os_win/os_sleep.c @@ -15,11 +15,15 @@ void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) { + DWORD dwMilliseconds; + /* * If the caller wants a small pause, set to our * smallest granularity. */ if (seconds == 0 && micro_seconds < WT_THOUSAND) micro_seconds = WT_THOUSAND; - Sleep(seconds * WT_THOUSAND + micro_seconds / WT_THOUSAND); + dwMilliseconds = (DWORD) + (seconds * WT_THOUSAND + micro_seconds / WT_THOUSAND); + Sleep(dwMilliseconds); } diff --git a/src/third_party/wiredtiger/src/os_win/os_thread.c b/src/third_party/wiredtiger/src/os_win/os_thread.c index 3be0ccb9393..94c5a8b0ab2 100644 --- a/src/third_party/wiredtiger/src/os_win/os_thread.c +++ b/src/third_party/wiredtiger/src/os_win/os_thread.c @@ -21,7 +21,7 @@ __wt_thread_create(WT_SESSION_IMPL *session, if (*tidret != 0) return (0); - WT_RET_MSG(session, errno, "_beginthreadex"); + WT_RET_MSG(session, __wt_errno, "thread create: _beginthreadex"); } /* @@ -37,12 +37,13 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) /* * If we fail to wait, we will leak handles so do not continue */ - WT_PANIC_RET(session, ret == WAIT_FAILED ? __wt_errno() : ret, - "Wait for thread join failed"); + WT_PANIC_RET(session, + ret == WAIT_FAILED ? __wt_getlasterror() : ret, + "thread join: WaitForSingleObject"); if (CloseHandle(tid) == 0) { - WT_RET_MSG(session, __wt_errno(), - "CloseHandle: thread join"); + WT_RET_MSG(session, + __wt_getlasterror(), "thread join: CloseHandle"); } return (0); @@ -53,7 +54,7 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) * Fill in a printable version of the process and thread IDs. */ void -__wt_thread_id(char* buf, size_t buflen) +__wt_thread_id(char *buf, size_t buflen) { (void)snprintf(buf, buflen, "%" PRIu64 ":%" PRIu64, diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index a69f335c9b3..26123f6b66d 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -2409,8 +2409,8 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) /* Finalize the header information and write the page. */ dsk->recno = last->recno; dsk->u.entries = r->entries; - dsk->mem_size = - r->disk_image.size = WT_PTRDIFF32(r->first_free, dsk); + dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); + r->disk_image.size = dsk->mem_size; WT_RET( __rec_split_write(session, r, last, &r->disk_image, false)); @@ -2790,9 +2790,9 @@ no_slots: WT_STAT_FAST_DATA_INCR(session, compress_raw_fail); dsk->recno = last->recno; - dsk->mem_size = - r->disk_image.size = WT_PTRDIFF32(r->first_free, dsk); + dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); dsk->u.entries = r->entries; + r->disk_image.size = dsk->mem_size; r->entries = 0; r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk); @@ -2972,7 +2972,8 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) dsk = r->disk_image.mem; dsk->recno = bnd->recno; dsk->u.entries = r->entries; - dsk->mem_size = r->disk_image.size = WT_PTRDIFF32(r->first_free, dsk); + dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk); + r->disk_image.size = dsk->mem_size; /* If this is a checkpoint, we're done, otherwise write the page. */ return (__rec_is_checkpoint(session, r, bnd) ? @@ -6086,8 +6087,9 @@ __rec_cell_build_ovfl(WT_SESSION_IMPL *session, dsk->u.datalen = (uint32_t)kv->buf.size; memcpy(WT_PAGE_HEADER_BYTE(btree, dsk), kv->buf.data, kv->buf.size); - dsk->mem_size = tmp->size = + dsk->mem_size = WT_PAGE_HEADER_BYTE_SIZE(btree) + (uint32_t)kv->buf.size; + tmp->size = dsk->mem_size; /* Write the buffer. */ addr = buf; diff --git a/src/third_party/wiredtiger/src/session/session_compact.c b/src/third_party/wiredtiger/src/session/session_compact.c index 2a53ad58f52..3f7b34d132f 100644 --- a/src/third_party/wiredtiger/src/session/session_compact.c +++ b/src/third_party/wiredtiger/src/session/session_compact.c @@ -267,8 +267,9 @@ __wt_session_compact( session = (WT_SESSION_IMPL *)wt_session; SESSION_API_CALL(session, compact, config, cfg); + /* In-memory is already as compact as it's going to get. */ if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) - WT_ERR(ENOTSUP); + goto err; /* Disallow objects in the WiredTiger name space. */ WT_ERR(__wt_str_name_check(session, uri)); diff --git a/src/third_party/wiredtiger/src/support/err.c b/src/third_party/wiredtiger/src/support/err.c index 875bd3efcf3..f64492f1561 100644 --- a/src/third_party/wiredtiger/src/support/err.c +++ b/src/third_party/wiredtiger/src/support/err.c @@ -16,12 +16,15 @@ static int __handle_error_default(WT_EVENT_HANDLER *handler, WT_SESSION *wt_session, int error, const char *errmsg) { + WT_SESSION_IMPL *session; + WT_UNUSED(handler); - WT_UNUSED(wt_session); WT_UNUSED(error); - WT_RET(__wt_fprintf(stderr, "%s\n", errmsg)); - WT_RET(__wt_fflush(stderr)); + session = (WT_SESSION_IMPL *)wt_session; + + WT_RET(__wt_fprintf(session, WT_STDERR(session), "%s\n", errmsg)); + WT_RET(__wt_fsync(session, WT_STDERR(session), true)); return (0); } @@ -33,11 +36,13 @@ static int __handle_message_default(WT_EVENT_HANDLER *handler, WT_SESSION *wt_session, const char *message) { + WT_SESSION_IMPL *session; + WT_UNUSED(handler); - WT_UNUSED(wt_session); - WT_RET(__wt_fprintf(stdout, "%s\n", message)); - WT_RET(__wt_fflush(stdout)); + session = (WT_SESSION_IMPL *)wt_session; + WT_RET(__wt_fprintf(session, WT_STDOUT(session), "%s\n", message)); + WT_RET(__wt_fsync(session, WT_STDOUT(session), true)); return (0); } @@ -175,13 +180,19 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * example, we can end up here without a session.) */ if (session == NULL) { - WT_RET(__wt_fprintf(stderr, + if (fprintf(stderr, "WiredTiger Error%s%s: ", error == 0 ? "" : ": ", - error == 0 ? "" : __wt_strerror(session, error, NULL, 0))); - WT_RET(__wt_vfprintf(stderr, fmt, ap)); - WT_RET(__wt_fprintf(stderr, "\n")); - return (__wt_fflush(stderr)); + error == 0 ? "" : + __wt_strerror(session, error, NULL, 0)) < 0) + ret = EIO; + if (vfprintf(stderr, fmt, ap) < 0) + ret = EIO; + if (fprintf(stderr, "\n") < 0) + ret = EIO; + if (fflush(stderr) != 0) + ret = EIO; + return (ret); } p = s; diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c b/src/third_party/wiredtiger/src/support/mtx_rw.c index b6876cdfbdc..b6876cdfbdc 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c +++ b/src/third_party/wiredtiger/src/support/mtx_rw.c diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 1eebc9e9d04..fdbda26b781 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -1246,7 +1246,7 @@ __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]) if (!F_ISSET(S2C(session), WT_CONN_CKPT_SYNC)) return (0); - return (bm->sync(bm, session, false)); + return (bm->sync(bm, session, true)); } /* diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c index 37a6e0b3711..da2670fb344 100644 --- a/src/third_party/wiredtiger/src/txn/txn_log.c +++ b/src/third_party/wiredtiger/src/txn/txn_log.c @@ -10,7 +10,6 @@ /* Cookie passed to __txn_printlog. */ typedef struct { - FILE *out; uint32_t flags; } WT_TXN_PRINTLOG_ARGS; @@ -69,28 +68,28 @@ err: __wt_buf_free(session, &key); * Print a commit log record. */ static int -__txn_commit_printlog( - WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, - uint32_t flags) +__txn_commit_printlog(WT_SESSION_IMPL *session, + const uint8_t **pp, const uint8_t *end, uint32_t flags) { bool firstrecord; firstrecord = true; - WT_RET(__wt_fprintf(out, " \"ops\": [\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"ops\": [\n")); /* The logging subsystem zero-pads records. */ while (*pp < end && **pp) { if (!firstrecord) - WT_RET(__wt_fprintf(out, ",\n")); - WT_RET(__wt_fprintf(out, " {")); + WT_RET(__wt_fprintf( + session, WT_STDOUT(session), ",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " {")); firstrecord = false; - WT_RET(__wt_txn_op_printlog(session, pp, end, out, flags)); - WT_RET(__wt_fprintf(out, "\n }")); + WT_RET(__wt_txn_op_printlog(session, pp, end, flags)); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), "\n }")); } - WT_RET(__wt_fprintf(out, "\n ]\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), "\n ]\n")); return (0); } @@ -465,7 +464,6 @@ __txn_printlog(WT_SESSION_IMPL *session, WT_ITEM *rawrec, WT_LSN *lsnp, WT_LSN *next_lsnp, void *cookie, int firstrecord) { - FILE *out; WT_LOG_RECORD *logrec; WT_TXN_PRINTLOG_ARGS *args; const uint8_t *end, *p; @@ -477,7 +475,6 @@ __txn_printlog(WT_SESSION_IMPL *session, WT_UNUSED(next_lsnp); args = cookie; - out = args->out; p = WT_LOG_SKIP_HEADER(rawrec->data); end = (const uint8_t *)rawrec->data + rawrec->size; @@ -488,16 +485,16 @@ __txn_printlog(WT_SESSION_IMPL *session, WT_RET(__wt_logrec_read(session, &p, end, &rectype)); if (!firstrecord) - WT_RET(__wt_fprintf(out, ",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), ",\n")); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " { \"lsn\" : [%" PRIu32 ",%" PRIu32 "],\n", lsnp->l.file, lsnp->l.offset)); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"hdr_flags\" : \"%s\",\n", compressed ? "compressed" : "")); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"rec_len\" : %" PRIu32 ",\n", logrec->len)); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"mem_len\" : %" PRIu32 ",\n", compressed ? logrec->mem_len : logrec->len)); @@ -505,40 +502,44 @@ __txn_printlog(WT_SESSION_IMPL *session, case WT_LOGREC_CHECKPOINT: WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p), WT_UNCHECKED_STRING(II), &lsnfile, &lsnoffset)); - WT_RET(__wt_fprintf(out, " \"type\" : \"checkpoint\",\n")); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), + " \"type\" : \"checkpoint\",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"ckpt_lsn\" : [%" PRIu32 ",%" PRIu32 "]\n", lsnfile, lsnoffset)); break; case WT_LOGREC_COMMIT: WT_RET(__wt_vunpack_uint(&p, WT_PTRDIFF(end, p), &txnid)); - WT_RET(__wt_fprintf(out, " \"type\" : \"commit\",\n")); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), + " \"type\" : \"commit\",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"txnid\" : %" PRIu64 ",\n", txnid)); - WT_RET(__txn_commit_printlog(session, &p, end, out, - args->flags)); + WT_RET(__txn_commit_printlog(session, &p, end, args->flags)); break; case WT_LOGREC_FILE_SYNC: WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p), WT_UNCHECKED_STRING(Ii), &fileid, &start)); - WT_RET(__wt_fprintf(out, " \"type\" : \"file_sync\",\n")); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), + " \"type\" : \"file_sync\",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"fileid\" : %" PRIu32 ",\n", fileid)); - WT_RET(__wt_fprintf(out, + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " \"start\" : %" PRId32 "\n", start)); break; case WT_LOGREC_MESSAGE: WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p), WT_UNCHECKED_STRING(S), &msg)); - WT_RET(__wt_fprintf(out, " \"type\" : \"message\",\n")); - WT_RET(__wt_fprintf(out, " \"message\" : \"%s\"\n", msg)); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), + " \"type\" : \"message\",\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), + " \"message\" : \"%s\"\n", msg)); break; } - WT_RET(__wt_fprintf(out, " }")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), " }")); return (0); } @@ -548,19 +549,18 @@ __txn_printlog(WT_SESSION_IMPL *session, * Print the log in a human-readable format. */ int -__wt_txn_printlog(WT_SESSION *wt_session, FILE *out, uint32_t flags) +__wt_txn_printlog(WT_SESSION *wt_session, uint32_t flags) { WT_SESSION_IMPL *session; WT_TXN_PRINTLOG_ARGS args; session = (WT_SESSION_IMPL *)wt_session; - args.out = out; args.flags = flags; - WT_RET(__wt_fprintf(out, "[\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), "[\n")); WT_RET(__wt_log_scan( session, NULL, WT_LOGSCAN_FIRST, __txn_printlog, &args)); - WT_RET(__wt_fprintf(out, "\n]\n")); + WT_RET(__wt_fprintf(session, WT_STDOUT(session), "\n]\n")); return (0); } diff --git a/src/third_party/wiredtiger/src/utilities/util_backup.c b/src/third_party/wiredtiger/src/utilities/util_backup.c index b3afc78e9e8..190c0878f38 100644 --- a/src/third_party/wiredtiger/src/utilities/util_backup.c +++ b/src/third_party/wiredtiger/src/utilities/util_backup.c @@ -8,12 +8,9 @@ #include "util.h" -static int copy(const char *, const char *); +static int copy(WT_SESSION *, const char *, const char *); static int usage(void); -#define CBUF_LEN (128 * 1024) /* Copy buffer and size. */ -static char *cbuf; - /* * append_target -- * Build a list of comma-separated targets. @@ -86,7 +83,7 @@ util_backup(WT_SESSION *session, int argc, char *argv[]) while ( (ret = cursor->next(cursor)) == 0 && (ret = cursor->get_key(cursor, &name)) == 0) - if ((ret = copy(name, directory)) != 0) + if ((ret = copy(session, directory, name)) != 0) goto err; if (ret == WT_NOTFOUND) ret = 0; @@ -98,97 +95,41 @@ util_backup(WT_SESSION *session, int argc, char *argv[]) } err: free(config); - free(cbuf); - return (ret); } static int -copy(const char *name, const char *directory) +copy(WT_SESSION *session, const char *directory, const char *name) { WT_DECL_RET; - ssize_t n; - int ifd, ofd; - - ret = 1; - ifd = ofd = -1; - - if (verbose && - printf("Backing up %s/%s to %s\n", home, name, directory) < 0) { - fprintf(stderr, "%s: %s\n", progname, strerror(errno)); - return (1); - } + size_t len; + char *to; - /* Allocate a large copy buffer (use it to build pathnames as well. */ - if (cbuf == NULL && (cbuf = malloc(CBUF_LEN)) == NULL) - goto memerr; - - /* Open the read file. */ - if (snprintf(cbuf, CBUF_LEN, "%s/%s", home, name) >= CBUF_LEN) - goto memerr; - if ((ifd = open(cbuf, O_BINARY | O_RDONLY, 0)) < 0) - goto readerr; + to = NULL; - /* Open the write file. */ - if (snprintf(cbuf, CBUF_LEN, "%s/%s", directory, name) >= CBUF_LEN) + /* Build the target pathname. */ + len = strlen(directory) + strlen(name) + 2; + if ((to = malloc(len)) == NULL) goto memerr; - if ((ofd = open( - cbuf, O_BINARY | O_CREAT | O_WRONLY | O_TRUNC, 0666)) < 0) - goto writerr; + (void)snprintf(to, len, "%s/%s", directory, name); - /* Copy the file. */ - while ((n = read(ifd, cbuf, CBUF_LEN)) > 0) - if (write(ofd, cbuf, (size_t)n) != n) - goto writerr; - if (n != 0) - goto readerr; - - /* - * Close file descriptors (forcing a flush on the write side), and - * check for any errors. - */ - ret = close(ifd); - ifd = -1; - if (ret != 0) - goto readerr; + if (verbose && printf("Backing up %s/%s to %s\n", home, name, to) < 0) { + fprintf(stderr, "%s: %s\n", progname, strerror(EIO)); + goto err; + } /* - * We need to know this file was successfully written, it's a backup. + * Use WiredTiger to copy the file: ensuring stability of the copied + * file on disk requires care, and WiredTiger knows how to do it. */ -#ifdef _WIN32 - if (FlushFileBuffers((HANDLE)_get_osfhandle(ofd)) == 0) { - DWORD err = GetLastError(); - ret = err; - goto writerr; - } -#else - if (fsync(ofd)) - goto writerr; -#endif - ret = close(ofd); - ofd = -1; - if (ret != 0) - goto writerr; - - /* Success. */ - ret = 0; + if ((ret = __wt_copy_and_sync(session, name, to)) != 0) + fprintf(stderr, "%s/%s to %s: backup copy: %s\n", + home, name, to, session->strerror(session, ret)); if (0) { -readerr: fprintf(stderr, - "%s: %s/%s: %s\n", progname, home, name, strerror(errno)); - } - if (0) { -writerr: fprintf(stderr, "%s: %s/%s: %s\n", - progname, directory, name, strerror(errno)); - } - if (0) { memerr: fprintf(stderr, "%s: %s\n", progname, strerror(errno)); } - - if (ifd >= 0) - (void)close(ifd); - if (ofd >= 0) - (void)close(ofd); +err: free(to); return (ret); } diff --git a/src/third_party/wiredtiger/src/utilities/util_load_json.c b/src/third_party/wiredtiger/src/utilities/util_load_json.c index 9349d39bb1e..3a1f847a95f 100644 --- a/src/third_party/wiredtiger/src/utilities/util_load_json.c +++ b/src/third_party/wiredtiger/src/utilities/util_load_json.c @@ -213,8 +213,7 @@ json_data(WT_SESSION *session, { WT_CURSOR *cursor; WT_DECL_RET; - size_t keystrlen; - ssize_t gotnolen; + size_t gotnolen, keystrlen; uint64_t gotno, recno; int nfield, nkeys, toktype, tret; bool isrec; @@ -274,9 +273,8 @@ json_data(WT_SESSION *session, /* Verify the dump has recnos in order. */ recno++; gotno = __wt_strtouq(ins->tokstart, &endp, 0); - gotnolen = (endp - ins->tokstart); - if (recno != gotno || - ins->toklen != (size_t)gotnolen) { + gotnolen = (size_t)(endp - ins->tokstart); + if (recno != gotno || ins->toklen != gotnolen) { ret = util_err(session, 0, "%s: recno out of order", uri); goto err; diff --git a/src/third_party/wiredtiger/src/utilities/util_printlog.c b/src/third_party/wiredtiger/src/utilities/util_printlog.c index 9a2bdc8a9ba..e7fa2134934 100644 --- a/src/third_party/wiredtiger/src/utilities/util_printlog.c +++ b/src/third_party/wiredtiger/src/utilities/util_printlog.c @@ -41,7 +41,7 @@ util_printlog(WT_SESSION *session, int argc, char *argv[]) if (argc != 0) return (usage()); - ret = __wt_txn_printlog(session, stdout, flags); + ret = __wt_txn_printlog(session, flags); if (ret != 0) { fprintf(stderr, "%s: printlog failed: %s\n", diff --git a/src/third_party/wiredtiger/test/mciproject.yml b/src/third_party/wiredtiger/test/mciproject.yml index 49caa44438d..9abdf23ec3b 100644 --- a/src/third_party/wiredtiger/test/mciproject.yml +++ b/src/third_party/wiredtiger/test/mciproject.yml @@ -124,10 +124,10 @@ buildvariants: - name: compile-windows-alt - name: fops-windows -- name: osx-108 - display_name: OS X 10.8 +- name: osx-1010 + display_name: OS X 10.10 run_on: - - osx-108 + - osx-1010 expansions: smp_command: -j$(sysctl -n hw.logicalcpu) configure_env_vars: PATH=/opt/local/bin:$PATH diff --git a/src/third_party/wiredtiger/test/readonly/smoke.sh b/src/third_party/wiredtiger/test/readonly/smoke.sh index 740deb5743a..8dba513e7af 100755 --- a/src/third_party/wiredtiger/test/readonly/smoke.sh +++ b/src/third_party/wiredtiger/test/readonly/smoke.sh @@ -1,6 +1,6 @@ #!/bin/sh -trap 'chmod -R u+w WT_*; exit 0' 0 1 2 3 13 15 +trap 'chmod -R u+w WT_*' 0 1 2 3 13 15 set -e diff --git a/src/third_party/wiredtiger/test/recovery/random-abort.c b/src/third_party/wiredtiger/test/recovery/random-abort.c index f9c3ed28814..cd7d1b08708 100644 --- a/src/third_party/wiredtiger/test/recovery/random-abort.c +++ b/src/third_party/wiredtiger/test/recovery/random-abort.c @@ -110,7 +110,7 @@ fill_db(void) /* * Set to no buffering. */ - (void)setvbuf(fp, NULL, _IONBF, 0); + __wt_stream_set_no_buffer(fp); /* * Write data into the table until we are killed by the parent. diff --git a/src/third_party/wiredtiger/test/recovery/truncated-log.c b/src/third_party/wiredtiger/test/recovery/truncated-log.c index 67fdb932c27..e099873e5b9 100644 --- a/src/third_party/wiredtiger/test/recovery/truncated-log.c +++ b/src/third_party/wiredtiger/test/recovery/truncated-log.c @@ -107,7 +107,7 @@ fill_db(void) /* * Set to no buffering. */ - (void)setvbuf(fp, NULL, _IONBF, 0); + __wt_stream_set_no_buffer(fp); save_lsn.l.file = 0; /* diff --git a/src/third_party/wiredtiger/test/utility/test_util.i b/src/third_party/wiredtiger/test/utility/test_util.i index c5cebadcb5c..43982d9e4a1 100644 --- a/src/third_party/wiredtiger/test/utility/test_util.i +++ b/src/third_party/wiredtiger/test/utility/test_util.i @@ -101,13 +101,13 @@ testutil_die(int e, const char *fmt, ...) * Creates the full intended work directory in buffer. */ static inline void -testutil_work_dir_from_path(char *buffer, size_t inputSize, const char *dir) +testutil_work_dir_from_path(char *buffer, size_t len, const char *dir) { /* If no directory is provided, use the default. */ if (dir == NULL) dir = DEFAULT_DIR; - if (inputSize < strlen(dir) + 1) + if (len < strlen(dir) + 1) testutil_die(ENOMEM, "Not enough memory in buffer for directory %s", dir); @@ -116,55 +116,48 @@ testutil_work_dir_from_path(char *buffer, size_t inputSize, const char *dir) /* * testutil_clean_work_dir -- - * Remove any existing work directories, can optionally fail on error + * Remove the work directory. */ static inline void testutil_clean_work_dir(char *dir) { - size_t inputSize; + size_t len; int ret; - bool exist; - char *buffer; + char *buf; /* Additional bytes for the Windows rd command. */ - inputSize = strlen(dir) + sizeof(RM_COMMAND); - if ((buffer = malloc(inputSize)) == NULL) + len = strlen(dir) + strlen(RM_COMMAND) + 1; + if ((buf = malloc(len)) == NULL) testutil_die(ENOMEM, "Failed to allocate memory"); - snprintf(buffer, inputSize, "%s%s", RM_COMMAND, dir); + snprintf(buf, len, "%s%s", RM_COMMAND, dir); - exist = 0; - if ((ret = __wt_exist(NULL, dir, &exist)) != 0) - testutil_die(ret, - "Unable to check if directory exists"); - if (exist == 1 && (ret = system(buffer)) != 0) - testutil_die(ret, - "System call to remove directory failed"); - free(buffer); + if ((ret = system(buf)) != 0 && ret != ENOENT) + testutil_die(ret, "%s", buf); + free(buf); } /* * testutil_make_work_dir -- - * Delete the existing work directory if it exists, then create a new one. + * Delete the existing work directory, then create a new one. */ static inline void testutil_make_work_dir(char *dir) { - size_t inputSize; + size_t len; int ret; - char *buffer; + char *buf; testutil_clean_work_dir(dir); /* Additional bytes for the mkdir command */ - inputSize = strlen(dir) + sizeof(MKDIR_COMMAND); - if ((buffer = malloc(inputSize)) == NULL) + len = strlen(dir) + strlen(MKDIR_COMMAND) + 1; + if ((buf = malloc(len)) == NULL) testutil_die(ENOMEM, "Failed to allocate memory"); /* mkdir shares syntax between Windows and Linux */ - snprintf(buffer, inputSize, "%s%s", MKDIR_COMMAND, dir); - if ((ret = system(buffer)) != 0) - testutil_die(ret, "directory create call of '%s%s' failed", - MKDIR_COMMAND, dir); - free(buffer); + snprintf(buf, len, "%s%s", MKDIR_COMMAND, dir); + if ((ret = system(buf)) != 0) + testutil_die(ret, "%s", buf); + free(buf); } |